diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 2 | ||||
-rw-r--r-- | block/as-iosched.c | 144 | ||||
-rw-r--r-- | block/cfq-iosched.c | 16 | ||||
-rw-r--r-- | block/deadline-iosched.c | 8 | ||||
-rw-r--r-- | block/elevator.c | 100 | ||||
-rw-r--r-- | block/genhd.c | 154 | ||||
-rw-r--r-- | block/ioctl.c | 24 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 699 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 63 |
9 files changed, 742 insertions, 468 deletions
diff --git a/block/Kconfig b/block/Kconfig index eb48edb80c1d..377f6dd20e17 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -5,7 +5,7 @@ | |||
5 | #for instance. | 5 | #for instance. |
6 | config LBD | 6 | config LBD |
7 | bool "Support for Large Block Devices" | 7 | bool "Support for Large Block Devices" |
8 | depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML | 8 | depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML |
9 | help | 9 | help |
10 | Say Y here if you want to attach large (bigger than 2TB) discs to | 10 | Say Y here if you want to attach large (bigger than 2TB) discs to |
11 | your machine, or if you want to have a raid or loopback device | 11 | your machine, or if you want to have a raid or loopback device |
diff --git a/block/as-iosched.c b/block/as-iosched.c index 43fa20495688..8da3cf66894c 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -182,6 +182,9 @@ struct as_rq { | |||
182 | 182 | ||
183 | static kmem_cache_t *arq_pool; | 183 | static kmem_cache_t *arq_pool; |
184 | 184 | ||
185 | static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); | ||
186 | static void as_antic_stop(struct as_data *ad); | ||
187 | |||
185 | /* | 188 | /* |
186 | * IO Context helper functions | 189 | * IO Context helper functions |
187 | */ | 190 | */ |
@@ -370,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) | |||
370 | * existing request against the same sector), which can happen when using | 373 | * existing request against the same sector), which can happen when using |
371 | * direct IO, then return the alias. | 374 | * direct IO, then return the alias. |
372 | */ | 375 | */ |
373 | static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | 376 | static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) |
374 | { | 377 | { |
375 | struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; | 378 | struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; |
376 | struct rb_node *parent = NULL; | 379 | struct rb_node *parent = NULL; |
@@ -397,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | |||
397 | return NULL; | 400 | return NULL; |
398 | } | 401 | } |
399 | 402 | ||
403 | static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | ||
404 | { | ||
405 | struct as_rq *alias; | ||
406 | |||
407 | while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { | ||
408 | as_move_to_dispatch(ad, alias); | ||
409 | as_antic_stop(ad); | ||
410 | } | ||
411 | } | ||
412 | |||
400 | static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) | 413 | static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) |
401 | { | 414 | { |
402 | if (!ON_RB(&arq->rb_node)) { | 415 | if (!ON_RB(&arq->rb_node)) { |
@@ -1133,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) | |||
1133 | /* | 1146 | /* |
1134 | * take it off the sort and fifo list, add to dispatch queue | 1147 | * take it off the sort and fifo list, add to dispatch queue |
1135 | */ | 1148 | */ |
1136 | while (!list_empty(&rq->queuelist)) { | ||
1137 | struct request *__rq = list_entry_rq(rq->queuelist.next); | ||
1138 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1139 | |||
1140 | list_del(&__rq->queuelist); | ||
1141 | |||
1142 | elv_dispatch_add_tail(ad->q, __rq); | ||
1143 | |||
1144 | if (__arq->io_context && __arq->io_context->aic) | ||
1145 | atomic_inc(&__arq->io_context->aic->nr_dispatched); | ||
1146 | |||
1147 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1148 | __arq->state = AS_RQ_DISPATCHED; | ||
1149 | |||
1150 | ad->nr_dispatched++; | ||
1151 | } | ||
1152 | |||
1153 | as_remove_queued_request(ad->q, rq); | 1149 | as_remove_queued_request(ad->q, rq); |
1154 | WARN_ON(arq->state != AS_RQ_QUEUED); | 1150 | WARN_ON(arq->state != AS_RQ_QUEUED); |
1155 | 1151 | ||
@@ -1326,49 +1322,12 @@ fifo_expired: | |||
1326 | } | 1322 | } |
1327 | 1323 | ||
1328 | /* | 1324 | /* |
1329 | * Add arq to a list behind alias | ||
1330 | */ | ||
1331 | static inline void | ||
1332 | as_add_aliased_request(struct as_data *ad, struct as_rq *arq, | ||
1333 | struct as_rq *alias) | ||
1334 | { | ||
1335 | struct request *req = arq->request; | ||
1336 | struct list_head *insert = alias->request->queuelist.prev; | ||
1337 | |||
1338 | /* | ||
1339 | * Transfer list of aliases | ||
1340 | */ | ||
1341 | while (!list_empty(&req->queuelist)) { | ||
1342 | struct request *__rq = list_entry_rq(req->queuelist.next); | ||
1343 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1344 | |||
1345 | list_move_tail(&__rq->queuelist, &alias->request->queuelist); | ||
1346 | |||
1347 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1348 | } | ||
1349 | |||
1350 | /* | ||
1351 | * Another request with the same start sector on the rbtree. | ||
1352 | * Link this request to that sector. They are untangled in | ||
1353 | * as_move_to_dispatch | ||
1354 | */ | ||
1355 | list_add(&arq->request->queuelist, insert); | ||
1356 | |||
1357 | /* | ||
1358 | * Don't want to have to handle merges. | ||
1359 | */ | ||
1360 | as_del_arq_hash(arq); | ||
1361 | arq->request->flags |= REQ_NOMERGE; | ||
1362 | } | ||
1363 | |||
1364 | /* | ||
1365 | * add arq to rbtree and fifo | 1325 | * add arq to rbtree and fifo |
1366 | */ | 1326 | */ |
1367 | static void as_add_request(request_queue_t *q, struct request *rq) | 1327 | static void as_add_request(request_queue_t *q, struct request *rq) |
1368 | { | 1328 | { |
1369 | struct as_data *ad = q->elevator->elevator_data; | 1329 | struct as_data *ad = q->elevator->elevator_data; |
1370 | struct as_rq *arq = RQ_DATA(rq); | 1330 | struct as_rq *arq = RQ_DATA(rq); |
1371 | struct as_rq *alias; | ||
1372 | int data_dir; | 1331 | int data_dir; |
1373 | 1332 | ||
1374 | arq->state = AS_RQ_NEW; | 1333 | arq->state = AS_RQ_NEW; |
@@ -1387,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq) | |||
1387 | atomic_inc(&arq->io_context->aic->nr_queued); | 1346 | atomic_inc(&arq->io_context->aic->nr_queued); |
1388 | } | 1347 | } |
1389 | 1348 | ||
1390 | alias = as_add_arq_rb(ad, arq); | 1349 | as_add_arq_rb(ad, arq); |
1391 | if (!alias) { | 1350 | if (rq_mergeable(arq->request)) |
1392 | /* | 1351 | as_add_arq_hash(ad, arq); |
1393 | * set expire time (only used for reads) and add to fifo list | ||
1394 | */ | ||
1395 | arq->expires = jiffies + ad->fifo_expire[data_dir]; | ||
1396 | list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); | ||
1397 | 1352 | ||
1398 | if (rq_mergeable(arq->request)) | 1353 | /* |
1399 | as_add_arq_hash(ad, arq); | 1354 | * set expire time (only used for reads) and add to fifo list |
1400 | as_update_arq(ad, arq); /* keep state machine up to date */ | 1355 | */ |
1401 | 1356 | arq->expires = jiffies + ad->fifo_expire[data_dir]; | |
1402 | } else { | 1357 | list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); |
1403 | as_add_aliased_request(ad, arq, alias); | ||
1404 | |||
1405 | /* | ||
1406 | * have we been anticipating this request? | ||
1407 | * or does it come from the same process as the one we are | ||
1408 | * anticipating for? | ||
1409 | */ | ||
1410 | if (ad->antic_status == ANTIC_WAIT_REQ | ||
1411 | || ad->antic_status == ANTIC_WAIT_NEXT) { | ||
1412 | if (as_can_break_anticipation(ad, arq)) | ||
1413 | as_antic_stop(ad); | ||
1414 | } | ||
1415 | } | ||
1416 | 1358 | ||
1359 | as_update_arq(ad, arq); /* keep state machine up to date */ | ||
1417 | arq->state = AS_RQ_QUEUED; | 1360 | arq->state = AS_RQ_QUEUED; |
1418 | } | 1361 | } |
1419 | 1362 | ||
@@ -1536,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req) | |||
1536 | * if the merge was a front merge, we need to reposition request | 1479 | * if the merge was a front merge, we need to reposition request |
1537 | */ | 1480 | */ |
1538 | if (rq_rb_key(req) != arq->rb_key) { | 1481 | if (rq_rb_key(req) != arq->rb_key) { |
1539 | struct as_rq *alias, *next_arq = NULL; | ||
1540 | |||
1541 | if (ad->next_arq[arq->is_sync] == arq) | ||
1542 | next_arq = as_find_next_arq(ad, arq); | ||
1543 | |||
1544 | /* | ||
1545 | * Note! We should really be moving any old aliased requests | ||
1546 | * off this request and try to insert them into the rbtree. We | ||
1547 | * currently don't bother. Ditto the next function. | ||
1548 | */ | ||
1549 | as_del_arq_rb(ad, arq); | 1482 | as_del_arq_rb(ad, arq); |
1550 | if ((alias = as_add_arq_rb(ad, arq))) { | 1483 | as_add_arq_rb(ad, arq); |
1551 | list_del_init(&arq->fifo); | ||
1552 | as_add_aliased_request(ad, arq, alias); | ||
1553 | if (next_arq) | ||
1554 | ad->next_arq[arq->is_sync] = next_arq; | ||
1555 | } | ||
1556 | /* | 1484 | /* |
1557 | * Note! At this stage of this and the next function, our next | 1485 | * Note! At this stage of this and the next function, our next |
1558 | * request may not be optimal - eg the request may have "grown" | 1486 | * request may not be optimal - eg the request may have "grown" |
@@ -1579,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req, | |||
1579 | as_add_arq_hash(ad, arq); | 1507 | as_add_arq_hash(ad, arq); |
1580 | 1508 | ||
1581 | if (rq_rb_key(req) != arq->rb_key) { | 1509 | if (rq_rb_key(req) != arq->rb_key) { |
1582 | struct as_rq *alias, *next_arq = NULL; | ||
1583 | |||
1584 | if (ad->next_arq[arq->is_sync] == arq) | ||
1585 | next_arq = as_find_next_arq(ad, arq); | ||
1586 | |||
1587 | as_del_arq_rb(ad, arq); | 1510 | as_del_arq_rb(ad, arq); |
1588 | if ((alias = as_add_arq_rb(ad, arq))) { | 1511 | as_add_arq_rb(ad, arq); |
1589 | list_del_init(&arq->fifo); | ||
1590 | as_add_aliased_request(ad, arq, alias); | ||
1591 | if (next_arq) | ||
1592 | ad->next_arq[arq->is_sync] = next_arq; | ||
1593 | } | ||
1594 | } | 1512 | } |
1595 | 1513 | ||
1596 | /* | 1514 | /* |
@@ -1610,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req, | |||
1610 | } | 1528 | } |
1611 | 1529 | ||
1612 | /* | 1530 | /* |
1613 | * Transfer list of aliases | ||
1614 | */ | ||
1615 | while (!list_empty(&next->queuelist)) { | ||
1616 | struct request *__rq = list_entry_rq(next->queuelist.next); | ||
1617 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1618 | |||
1619 | list_move_tail(&__rq->queuelist, &req->queuelist); | ||
1620 | |||
1621 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1622 | } | ||
1623 | |||
1624 | /* | ||
1625 | * kill knowledge of next, this one is a goner | 1531 | * kill knowledge of next, this one is a goner |
1626 | */ | 1532 | */ |
1627 | as_remove_queued_request(q, next); | 1533 | as_remove_queued_request(q, next); |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ee0bb41694b0..74fae2daf87e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -25,15 +25,15 @@ | |||
25 | /* | 25 | /* |
26 | * tunables | 26 | * tunables |
27 | */ | 27 | */ |
28 | static int cfq_quantum = 4; /* max queue in one round of service */ | 28 | static const int cfq_quantum = 4; /* max queue in one round of service */ |
29 | static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ | 29 | static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ |
30 | static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; | 30 | static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; |
31 | static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ | 31 | static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ |
32 | static int cfq_back_penalty = 2; /* penalty of a backwards seek */ | 32 | static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ |
33 | 33 | ||
34 | static int cfq_slice_sync = HZ / 10; | 34 | static const int cfq_slice_sync = HZ / 10; |
35 | static int cfq_slice_async = HZ / 25; | 35 | static int cfq_slice_async = HZ / 25; |
36 | static int cfq_slice_async_rq = 2; | 36 | static const int cfq_slice_async_rq = 2; |
37 | static int cfq_slice_idle = HZ / 100; | 37 | static int cfq_slice_idle = HZ / 100; |
38 | 38 | ||
39 | #define CFQ_IDLE_GRACE (HZ / 10) | 39 | #define CFQ_IDLE_GRACE (HZ / 10) |
@@ -45,7 +45,7 @@ static int cfq_slice_idle = HZ / 100; | |||
45 | /* | 45 | /* |
46 | * disable queueing at the driver/hardware level | 46 | * disable queueing at the driver/hardware level |
47 | */ | 47 | */ |
48 | static int cfq_max_depth = 2; | 48 | static const int cfq_max_depth = 2; |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * for the hash of cfqq inside the cfqd | 51 | * for the hash of cfqq inside the cfqd |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 9cbec09e8415..27e494b1bf97 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -19,10 +19,10 @@ | |||
19 | /* | 19 | /* |
20 | * See Documentation/block/deadline-iosched.txt | 20 | * See Documentation/block/deadline-iosched.txt |
21 | */ | 21 | */ |
22 | static int read_expire = HZ / 2; /* max time before a read is submitted. */ | 22 | static const int read_expire = HZ / 2; /* max time before a read is submitted. */ |
23 | static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ | 23 | static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ |
24 | static int writes_starved = 2; /* max times reads can starve a write */ | 24 | static const int writes_starved = 2; /* max times reads can starve a write */ |
25 | static int fifo_batch = 16; /* # of sequential requests treated as one | 25 | static const int fifo_batch = 16; /* # of sequential requests treated as one |
26 | by the above parameters. For throughput. */ | 26 | by the above parameters. For throughput. */ |
27 | 27 | ||
28 | static const int deadline_hash_shift = 5; | 28 | static const int deadline_hash_shift = 5; |
diff --git a/block/elevator.c b/block/elevator.c index 6c3fc8a10bf2..c9f424d5399c 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -64,7 +64,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
64 | } | 64 | } |
65 | EXPORT_SYMBOL(elv_rq_merge_ok); | 65 | EXPORT_SYMBOL(elv_rq_merge_ok); |
66 | 66 | ||
67 | inline int elv_try_merge(struct request *__rq, struct bio *bio) | 67 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
68 | { | 68 | { |
69 | int ret = ELEVATOR_NO_MERGE; | 69 | int ret = ELEVATOR_NO_MERGE; |
70 | 70 | ||
@@ -80,7 +80,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) | |||
80 | 80 | ||
81 | return ret; | 81 | return ret; |
82 | } | 82 | } |
83 | EXPORT_SYMBOL(elv_try_merge); | ||
84 | 83 | ||
85 | static struct elevator_type *elevator_find(const char *name) | 84 | static struct elevator_type *elevator_find(const char *name) |
86 | { | 85 | { |
@@ -150,13 +149,20 @@ static void elevator_setup_default(void) | |||
150 | if (!chosen_elevator[0]) | 149 | if (!chosen_elevator[0]) |
151 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); | 150 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); |
152 | 151 | ||
152 | /* | ||
153 | * Be backwards-compatible with previous kernels, so users | ||
154 | * won't get the wrong elevator. | ||
155 | */ | ||
156 | if (!strcmp(chosen_elevator, "as")) | ||
157 | strcpy(chosen_elevator, "anticipatory"); | ||
158 | |||
153 | /* | 159 | /* |
154 | * If the given scheduler is not available, fall back to no-op. | 160 | * If the given scheduler is not available, fall back to the default |
155 | */ | 161 | */ |
156 | if ((e = elevator_find(chosen_elevator))) | 162 | if ((e = elevator_find(chosen_elevator))) |
157 | elevator_put(e); | 163 | elevator_put(e); |
158 | else | 164 | else |
159 | strcpy(chosen_elevator, "noop"); | 165 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); |
160 | } | 166 | } |
161 | 167 | ||
162 | static int __init elevator_setup(char *str) | 168 | static int __init elevator_setup(char *str) |
@@ -304,15 +310,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) | |||
304 | 310 | ||
305 | rq->flags &= ~REQ_STARTED; | 311 | rq->flags &= ~REQ_STARTED; |
306 | 312 | ||
307 | /* | 313 | __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0); |
308 | * if this is the flush, requeue the original instead and drop the flush | ||
309 | */ | ||
310 | if (rq->flags & REQ_BAR_FLUSH) { | ||
311 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
312 | rq = rq->end_io_data; | ||
313 | } | ||
314 | |||
315 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
316 | } | 314 | } |
317 | 315 | ||
318 | static void elv_drain_elevator(request_queue_t *q) | 316 | static void elv_drain_elevator(request_queue_t *q) |
@@ -332,8 +330,19 @@ static void elv_drain_elevator(request_queue_t *q) | |||
332 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, | 330 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, |
333 | int plug) | 331 | int plug) |
334 | { | 332 | { |
333 | struct list_head *pos; | ||
334 | unsigned ordseq; | ||
335 | |||
336 | if (q->ordcolor) | ||
337 | rq->flags |= REQ_ORDERED_COLOR; | ||
338 | |||
335 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 339 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
336 | /* | 340 | /* |
341 | * toggle ordered color | ||
342 | */ | ||
343 | q->ordcolor ^= 1; | ||
344 | |||
345 | /* | ||
337 | * barriers implicitly indicate back insertion | 346 | * barriers implicitly indicate back insertion |
338 | */ | 347 | */ |
339 | if (where == ELEVATOR_INSERT_SORT) | 348 | if (where == ELEVATOR_INSERT_SORT) |
@@ -393,6 +402,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
393 | q->elevator->ops->elevator_add_req_fn(q, rq); | 402 | q->elevator->ops->elevator_add_req_fn(q, rq); |
394 | break; | 403 | break; |
395 | 404 | ||
405 | case ELEVATOR_INSERT_REQUEUE: | ||
406 | /* | ||
407 | * If ordered flush isn't in progress, we do front | ||
408 | * insertion; otherwise, requests should be requeued | ||
409 | * in ordseq order. | ||
410 | */ | ||
411 | rq->flags |= REQ_SOFTBARRIER; | ||
412 | |||
413 | if (q->ordseq == 0) { | ||
414 | list_add(&rq->queuelist, &q->queue_head); | ||
415 | break; | ||
416 | } | ||
417 | |||
418 | ordseq = blk_ordered_req_seq(rq); | ||
419 | |||
420 | list_for_each(pos, &q->queue_head) { | ||
421 | struct request *pos_rq = list_entry_rq(pos); | ||
422 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | ||
423 | break; | ||
424 | } | ||
425 | |||
426 | list_add_tail(&rq->queuelist, pos); | ||
427 | break; | ||
428 | |||
396 | default: | 429 | default: |
397 | printk(KERN_ERR "%s: bad insertion point %d\n", | 430 | printk(KERN_ERR "%s: bad insertion point %d\n", |
398 | __FUNCTION__, where); | 431 | __FUNCTION__, where); |
@@ -422,25 +455,16 @@ static inline struct request *__elv_next_request(request_queue_t *q) | |||
422 | { | 455 | { |
423 | struct request *rq; | 456 | struct request *rq; |
424 | 457 | ||
425 | if (unlikely(list_empty(&q->queue_head) && | 458 | while (1) { |
426 | !q->elevator->ops->elevator_dispatch_fn(q, 0))) | 459 | while (!list_empty(&q->queue_head)) { |
427 | return NULL; | 460 | rq = list_entry_rq(q->queue_head.next); |
428 | 461 | if (blk_do_ordered(q, &rq)) | |
429 | rq = list_entry_rq(q->queue_head.next); | 462 | return rq; |
430 | 463 | } | |
431 | /* | ||
432 | * if this is a barrier write and the device has to issue a | ||
433 | * flush sequence to support it, check how far we are | ||
434 | */ | ||
435 | if (blk_fs_request(rq) && blk_barrier_rq(rq)) { | ||
436 | BUG_ON(q->ordered == QUEUE_ORDERED_NONE); | ||
437 | 464 | ||
438 | if (q->ordered == QUEUE_ORDERED_FLUSH && | 465 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
439 | !blk_barrier_preflush(rq)) | 466 | return NULL; |
440 | rq = blk_start_pre_flush(q, rq); | ||
441 | } | 467 | } |
442 | |||
443 | return rq; | ||
444 | } | 468 | } |
445 | 469 | ||
446 | struct request *elv_next_request(request_queue_t *q) | 470 | struct request *elv_next_request(request_queue_t *q) |
@@ -498,7 +522,7 @@ struct request *elv_next_request(request_queue_t *q) | |||
498 | blkdev_dequeue_request(rq); | 522 | blkdev_dequeue_request(rq); |
499 | rq->flags |= REQ_QUIET; | 523 | rq->flags |= REQ_QUIET; |
500 | end_that_request_chunk(rq, 0, nr_bytes); | 524 | end_that_request_chunk(rq, 0, nr_bytes); |
501 | end_that_request_last(rq); | 525 | end_that_request_last(rq, 0); |
502 | } else { | 526 | } else { |
503 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | 527 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, |
504 | ret); | 528 | ret); |
@@ -597,6 +621,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq) | |||
597 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | 621 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) |
598 | e->ops->elevator_completed_req_fn(q, rq); | 622 | e->ops->elevator_completed_req_fn(q, rq); |
599 | } | 623 | } |
624 | |||
625 | /* | ||
626 | * Check if the queue is waiting for fs requests to be | ||
627 | * drained for flush sequence. | ||
628 | */ | ||
629 | if (unlikely(q->ordseq)) { | ||
630 | struct request *first_rq = list_entry_rq(q->queue_head.next); | ||
631 | if (q->in_flight == 0 && | ||
632 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | ||
633 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { | ||
634 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | ||
635 | q->request_fn(q); | ||
636 | } | ||
637 | } | ||
600 | } | 638 | } |
601 | 639 | ||
602 | int elv_register_queue(struct request_queue *q) | 640 | int elv_register_queue(struct request_queue *q) |
diff --git a/block/genhd.c b/block/genhd.c index f04609d553b8..db57546a709d 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -38,34 +38,100 @@ static inline int major_to_index(int major) | |||
38 | return major % MAX_PROBE_HASH; | 38 | return major % MAX_PROBE_HASH; |
39 | } | 39 | } |
40 | 40 | ||
41 | #ifdef CONFIG_PROC_FS | 41 | struct blkdev_info { |
42 | /* get block device names in somewhat random order */ | 42 | int index; |
43 | int get_blkdev_list(char *p, int used) | 43 | struct blk_major_name *bd; |
44 | }; | ||
45 | |||
46 | /* | ||
47 | * iterate over a list of blkdev_info structures. allows | ||
48 | * the major_names array to be iterated over from outside this file | ||
49 | * must be called with the block_subsys_sem held | ||
50 | */ | ||
51 | void *get_next_blkdev(void *dev) | ||
52 | { | ||
53 | struct blkdev_info *info; | ||
54 | |||
55 | if (dev == NULL) { | ||
56 | info = kmalloc(sizeof(*info), GFP_KERNEL); | ||
57 | if (!info) | ||
58 | goto out; | ||
59 | info->index=0; | ||
60 | info->bd = major_names[info->index]; | ||
61 | if (info->bd) | ||
62 | goto out; | ||
63 | } else { | ||
64 | info = dev; | ||
65 | } | ||
66 | |||
67 | while (info->index < ARRAY_SIZE(major_names)) { | ||
68 | if (info->bd) | ||
69 | info->bd = info->bd->next; | ||
70 | if (info->bd) | ||
71 | goto out; | ||
72 | /* | ||
73 | * No devices on this chain, move to the next | ||
74 | */ | ||
75 | info->index++; | ||
76 | info->bd = (info->index < ARRAY_SIZE(major_names)) ? | ||
77 | major_names[info->index] : NULL; | ||
78 | if (info->bd) | ||
79 | goto out; | ||
80 | } | ||
81 | |||
82 | out: | ||
83 | return info; | ||
84 | } | ||
85 | |||
86 | void *acquire_blkdev_list(void) | ||
87 | { | ||
88 | down(&block_subsys_sem); | ||
89 | return get_next_blkdev(NULL); | ||
90 | } | ||
91 | |||
92 | void release_blkdev_list(void *dev) | ||
93 | { | ||
94 | up(&block_subsys_sem); | ||
95 | kfree(dev); | ||
96 | } | ||
97 | |||
98 | |||
99 | /* | ||
100 | * Count the number of records in the blkdev_list. | ||
101 | * must be called with the block_subsys_sem held | ||
102 | */ | ||
103 | int count_blkdev_list(void) | ||
44 | { | 104 | { |
45 | struct blk_major_name *n; | 105 | struct blk_major_name *n; |
46 | int i, len; | 106 | int i, count; |
47 | 107 | ||
48 | len = snprintf(p, (PAGE_SIZE-used), "\nBlock devices:\n"); | 108 | count = 0; |
49 | 109 | ||
50 | down(&block_subsys_sem); | ||
51 | for (i = 0; i < ARRAY_SIZE(major_names); i++) { | 110 | for (i = 0; i < ARRAY_SIZE(major_names); i++) { |
52 | for (n = major_names[i]; n; n = n->next) { | 111 | for (n = major_names[i]; n; n = n->next) |
53 | /* | 112 | count++; |
54 | * If the curent string plus the 5 extra characters | ||
55 | * in the line would run us off the page, then we're done | ||
56 | */ | ||
57 | if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE) | ||
58 | goto page_full; | ||
59 | len += sprintf(p+len, "%3d %s\n", | ||
60 | n->major, n->name); | ||
61 | } | ||
62 | } | 113 | } |
63 | page_full: | ||
64 | up(&block_subsys_sem); | ||
65 | 114 | ||
66 | return len; | 115 | return count; |
67 | } | 116 | } |
68 | #endif | 117 | |
118 | /* | ||
119 | * extract the major and name values from a blkdev_info struct | ||
120 | * passed in as a void to *dev. Must be called with | ||
121 | * block_subsys_sem held | ||
122 | */ | ||
123 | int get_blkdev_info(void *dev, int *major, char **name) | ||
124 | { | ||
125 | struct blkdev_info *info = dev; | ||
126 | |||
127 | if (info->bd == NULL) | ||
128 | return 1; | ||
129 | |||
130 | *major = info->bd->major; | ||
131 | *name = info->bd->name; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
69 | 135 | ||
70 | int register_blkdev(unsigned int major, const char *name) | 136 | int register_blkdev(unsigned int major, const char *name) |
71 | { | 137 | { |
@@ -358,7 +424,7 @@ static struct sysfs_ops disk_sysfs_ops = { | |||
358 | static ssize_t disk_uevent_store(struct gendisk * disk, | 424 | static ssize_t disk_uevent_store(struct gendisk * disk, |
359 | const char *buf, size_t count) | 425 | const char *buf, size_t count) |
360 | { | 426 | { |
361 | kobject_hotplug(&disk->kobj, KOBJ_ADD); | 427 | kobject_uevent(&disk->kobj, KOBJ_ADD); |
362 | return count; | 428 | return count; |
363 | } | 429 | } |
364 | static ssize_t disk_dev_read(struct gendisk * disk, char *page) | 430 | static ssize_t disk_dev_read(struct gendisk * disk, char *page) |
@@ -455,14 +521,14 @@ static struct kobj_type ktype_block = { | |||
455 | 521 | ||
456 | extern struct kobj_type ktype_part; | 522 | extern struct kobj_type ktype_part; |
457 | 523 | ||
458 | static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) | 524 | static int block_uevent_filter(struct kset *kset, struct kobject *kobj) |
459 | { | 525 | { |
460 | struct kobj_type *ktype = get_ktype(kobj); | 526 | struct kobj_type *ktype = get_ktype(kobj); |
461 | 527 | ||
462 | return ((ktype == &ktype_block) || (ktype == &ktype_part)); | 528 | return ((ktype == &ktype_block) || (ktype == &ktype_part)); |
463 | } | 529 | } |
464 | 530 | ||
465 | static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | 531 | static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp, |
466 | int num_envp, char *buffer, int buffer_size) | 532 | int num_envp, char *buffer, int buffer_size) |
467 | { | 533 | { |
468 | struct kobj_type *ktype = get_ktype(kobj); | 534 | struct kobj_type *ktype = get_ktype(kobj); |
@@ -474,40 +540,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | |||
474 | 540 | ||
475 | if (ktype == &ktype_block) { | 541 | if (ktype == &ktype_block) { |
476 | disk = container_of(kobj, struct gendisk, kobj); | 542 | disk = container_of(kobj, struct gendisk, kobj); |
477 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 543 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
478 | &length, "MINOR=%u", disk->first_minor); | 544 | &length, "MINOR=%u", disk->first_minor); |
479 | } else if (ktype == &ktype_part) { | 545 | } else if (ktype == &ktype_part) { |
480 | disk = container_of(kobj->parent, struct gendisk, kobj); | 546 | disk = container_of(kobj->parent, struct gendisk, kobj); |
481 | part = container_of(kobj, struct hd_struct, kobj); | 547 | part = container_of(kobj, struct hd_struct, kobj); |
482 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 548 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
483 | &length, "MINOR=%u", | 549 | &length, "MINOR=%u", |
484 | disk->first_minor + part->partno); | 550 | disk->first_minor + part->partno); |
485 | } else | 551 | } else |
486 | return 0; | 552 | return 0; |
487 | 553 | ||
488 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, | 554 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, |
489 | "MAJOR=%u", disk->major); | 555 | "MAJOR=%u", disk->major); |
490 | 556 | ||
491 | /* add physical device, backing this device */ | 557 | /* add physical device, backing this device */ |
492 | physdev = disk->driverfs_dev; | 558 | physdev = disk->driverfs_dev; |
493 | if (physdev) { | 559 | if (physdev) { |
494 | char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); | 560 | char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); |
495 | 561 | ||
496 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 562 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
497 | &length, "PHYSDEVPATH=%s", path); | 563 | &length, "PHYSDEVPATH=%s", path); |
498 | kfree(path); | 564 | kfree(path); |
499 | 565 | ||
500 | if (physdev->bus) | 566 | if (physdev->bus) |
501 | add_hotplug_env_var(envp, num_envp, &i, | 567 | add_uevent_var(envp, num_envp, &i, |
502 | buffer, buffer_size, &length, | 568 | buffer, buffer_size, &length, |
503 | "PHYSDEVBUS=%s", | 569 | "PHYSDEVBUS=%s", |
504 | physdev->bus->name); | 570 | physdev->bus->name); |
505 | 571 | ||
506 | if (physdev->driver) | 572 | if (physdev->driver) |
507 | add_hotplug_env_var(envp, num_envp, &i, | 573 | add_uevent_var(envp, num_envp, &i, |
508 | buffer, buffer_size, &length, | 574 | buffer, buffer_size, &length, |
509 | "PHYSDEVDRIVER=%s", | 575 | "PHYSDEVDRIVER=%s", |
510 | physdev->driver->name); | 576 | physdev->driver->name); |
511 | } | 577 | } |
512 | 578 | ||
513 | /* terminate, set to next free slot, shrink available space */ | 579 | /* terminate, set to next free slot, shrink available space */ |
@@ -520,13 +586,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | |||
520 | return 0; | 586 | return 0; |
521 | } | 587 | } |
522 | 588 | ||
523 | static struct kset_hotplug_ops block_hotplug_ops = { | 589 | static struct kset_uevent_ops block_uevent_ops = { |
524 | .filter = block_hotplug_filter, | 590 | .filter = block_uevent_filter, |
525 | .hotplug = block_hotplug, | 591 | .uevent = block_uevent, |
526 | }; | 592 | }; |
527 | 593 | ||
528 | /* declare block_subsys. */ | 594 | /* declare block_subsys. */ |
529 | static decl_subsys(block, &ktype_block, &block_hotplug_ops); | 595 | static decl_subsys(block, &ktype_block, &block_uevent_ops); |
530 | 596 | ||
531 | 597 | ||
532 | /* | 598 | /* |
diff --git a/block/ioctl.c b/block/ioctl.c index 6e278474f9a8..e1109491c234 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/sched.h> /* for capable() */ | 1 | #include <linux/capability.h> |
2 | #include <linux/blkdev.h> | 2 | #include <linux/blkdev.h> |
3 | #include <linux/blkpg.h> | 3 | #include <linux/blkpg.h> |
4 | #include <linux/hdreg.h> | ||
4 | #include <linux/backing-dev.h> | 5 | #include <linux/backing-dev.h> |
5 | #include <linux/buffer_head.h> | 6 | #include <linux/buffer_head.h> |
6 | #include <linux/smp_lock.h> | 7 | #include <linux/smp_lock.h> |
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, | |||
245 | set_device_ro(bdev, n); | 246 | set_device_ro(bdev, n); |
246 | unlock_kernel(); | 247 | unlock_kernel(); |
247 | return 0; | 248 | return 0; |
249 | case HDIO_GETGEO: { | ||
250 | struct hd_geometry geo; | ||
251 | |||
252 | if (!arg) | ||
253 | return -EINVAL; | ||
254 | if (!disk->fops->getgeo) | ||
255 | return -ENOTTY; | ||
256 | |||
257 | /* | ||
258 | * We need to set the startsect first, the driver may | ||
259 | * want to override it. | ||
260 | */ | ||
261 | geo.start = get_start_sect(bdev); | ||
262 | ret = disk->fops->getgeo(bdev, &geo); | ||
263 | if (ret) | ||
264 | return ret; | ||
265 | if (copy_to_user((struct hd_geometry __user *)arg, &geo, | ||
266 | sizeof(geo))) | ||
267 | return -EFAULT; | ||
268 | return 0; | ||
269 | } | ||
248 | } | 270 | } |
249 | 271 | ||
250 | lock_kernel(); | 272 | lock_kernel(); |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 99c9ca6d5992..8e27d0ab0d7c 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -26,7 +26,8 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/blkdev.h> | 29 | #include <linux/interrupt.h> |
30 | #include <linux/cpu.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * for max sense size | 33 | * for max sense size |
@@ -36,6 +37,8 @@ | |||
36 | static void blk_unplug_work(void *data); | 37 | static void blk_unplug_work(void *data); |
37 | static void blk_unplug_timeout(unsigned long data); | 38 | static void blk_unplug_timeout(unsigned long data); |
38 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 39 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
40 | static void init_request_from_bio(struct request *req, struct bio *bio); | ||
41 | static int __make_request(request_queue_t *q, struct bio *bio); | ||
39 | 42 | ||
40 | /* | 43 | /* |
41 | * For the allocated request tables | 44 | * For the allocated request tables |
@@ -60,13 +63,15 @@ static wait_queue_head_t congestion_wqh[2] = { | |||
60 | /* | 63 | /* |
61 | * Controlling structure to kblockd | 64 | * Controlling structure to kblockd |
62 | */ | 65 | */ |
63 | static struct workqueue_struct *kblockd_workqueue; | 66 | static struct workqueue_struct *kblockd_workqueue; |
64 | 67 | ||
65 | unsigned long blk_max_low_pfn, blk_max_pfn; | 68 | unsigned long blk_max_low_pfn, blk_max_pfn; |
66 | 69 | ||
67 | EXPORT_SYMBOL(blk_max_low_pfn); | 70 | EXPORT_SYMBOL(blk_max_low_pfn); |
68 | EXPORT_SYMBOL(blk_max_pfn); | 71 | EXPORT_SYMBOL(blk_max_pfn); |
69 | 72 | ||
73 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | ||
74 | |||
70 | /* Amount of time in which a process may batch requests */ | 75 | /* Amount of time in which a process may batch requests */ |
71 | #define BLK_BATCH_TIME (HZ/50UL) | 76 | #define BLK_BATCH_TIME (HZ/50UL) |
72 | 77 | ||
@@ -205,6 +210,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) | |||
205 | 210 | ||
206 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 211 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
207 | 212 | ||
213 | void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) | ||
214 | { | ||
215 | q->softirq_done_fn = fn; | ||
216 | } | ||
217 | |||
218 | EXPORT_SYMBOL(blk_queue_softirq_done); | ||
219 | |||
208 | /** | 220 | /** |
209 | * blk_queue_make_request - define an alternate make_request function for a device | 221 | * blk_queue_make_request - define an alternate make_request function for a device |
210 | * @q: the request queue for the device to be affected | 222 | * @q: the request queue for the device to be affected |
@@ -239,7 +251,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) | |||
239 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 251 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
240 | q->backing_dev_info.state = 0; | 252 | q->backing_dev_info.state = 0; |
241 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 253 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
242 | blk_queue_max_sectors(q, MAX_SECTORS); | 254 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); |
243 | blk_queue_hardsect_size(q, 512); | 255 | blk_queue_hardsect_size(q, 512); |
244 | blk_queue_dma_alignment(q, 511); | 256 | blk_queue_dma_alignment(q, 511); |
245 | blk_queue_congestion_threshold(q); | 257 | blk_queue_congestion_threshold(q); |
@@ -268,6 +280,7 @@ EXPORT_SYMBOL(blk_queue_make_request); | |||
268 | static inline void rq_init(request_queue_t *q, struct request *rq) | 280 | static inline void rq_init(request_queue_t *q, struct request *rq) |
269 | { | 281 | { |
270 | INIT_LIST_HEAD(&rq->queuelist); | 282 | INIT_LIST_HEAD(&rq->queuelist); |
283 | INIT_LIST_HEAD(&rq->donelist); | ||
271 | 284 | ||
272 | rq->errors = 0; | 285 | rq->errors = 0; |
273 | rq->rq_status = RQ_ACTIVE; | 286 | rq->rq_status = RQ_ACTIVE; |
@@ -284,12 +297,13 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
284 | rq->sense = NULL; | 297 | rq->sense = NULL; |
285 | rq->end_io = NULL; | 298 | rq->end_io = NULL; |
286 | rq->end_io_data = NULL; | 299 | rq->end_io_data = NULL; |
300 | rq->completion_data = NULL; | ||
287 | } | 301 | } |
288 | 302 | ||
289 | /** | 303 | /** |
290 | * blk_queue_ordered - does this queue support ordered writes | 304 | * blk_queue_ordered - does this queue support ordered writes |
291 | * @q: the request queue | 305 | * @q: the request queue |
292 | * @flag: see below | 306 | * @ordered: one of QUEUE_ORDERED_* |
293 | * | 307 | * |
294 | * Description: | 308 | * Description: |
295 | * For journalled file systems, doing ordered writes on a commit | 309 | * For journalled file systems, doing ordered writes on a commit |
@@ -298,28 +312,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
298 | * feature should call this function and indicate so. | 312 | * feature should call this function and indicate so. |
299 | * | 313 | * |
300 | **/ | 314 | **/ |
301 | void blk_queue_ordered(request_queue_t *q, int flag) | 315 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, |
302 | { | 316 | prepare_flush_fn *prepare_flush_fn) |
303 | switch (flag) { | 317 | { |
304 | case QUEUE_ORDERED_NONE: | 318 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
305 | if (q->flush_rq) | 319 | prepare_flush_fn == NULL) { |
306 | kmem_cache_free(request_cachep, q->flush_rq); | 320 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
307 | q->flush_rq = NULL; | 321 | return -EINVAL; |
308 | q->ordered = flag; | ||
309 | break; | ||
310 | case QUEUE_ORDERED_TAG: | ||
311 | q->ordered = flag; | ||
312 | break; | ||
313 | case QUEUE_ORDERED_FLUSH: | ||
314 | q->ordered = flag; | ||
315 | if (!q->flush_rq) | ||
316 | q->flush_rq = kmem_cache_alloc(request_cachep, | ||
317 | GFP_KERNEL); | ||
318 | break; | ||
319 | default: | ||
320 | printk("blk_queue_ordered: bad value %d\n", flag); | ||
321 | break; | ||
322 | } | 322 | } |
323 | |||
324 | if (ordered != QUEUE_ORDERED_NONE && | ||
325 | ordered != QUEUE_ORDERED_DRAIN && | ||
326 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | ||
327 | ordered != QUEUE_ORDERED_DRAIN_FUA && | ||
328 | ordered != QUEUE_ORDERED_TAG && | ||
329 | ordered != QUEUE_ORDERED_TAG_FLUSH && | ||
330 | ordered != QUEUE_ORDERED_TAG_FUA) { | ||
331 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | |||
335 | q->next_ordered = ordered; | ||
336 | q->prepare_flush_fn = prepare_flush_fn; | ||
337 | |||
338 | return 0; | ||
323 | } | 339 | } |
324 | 340 | ||
325 | EXPORT_SYMBOL(blk_queue_ordered); | 341 | EXPORT_SYMBOL(blk_queue_ordered); |
@@ -344,167 +360,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); | |||
344 | /* | 360 | /* |
345 | * Cache flushing for ordered writes handling | 361 | * Cache flushing for ordered writes handling |
346 | */ | 362 | */ |
347 | static void blk_pre_flush_end_io(struct request *flush_rq) | 363 | inline unsigned blk_ordered_cur_seq(request_queue_t *q) |
348 | { | 364 | { |
349 | struct request *rq = flush_rq->end_io_data; | 365 | if (!q->ordseq) |
350 | request_queue_t *q = rq->q; | 366 | return 0; |
351 | 367 | return 1 << ffz(q->ordseq); | |
352 | elv_completed_request(q, flush_rq); | ||
353 | |||
354 | rq->flags |= REQ_BAR_PREFLUSH; | ||
355 | |||
356 | if (!flush_rq->errors) | ||
357 | elv_requeue_request(q, rq); | ||
358 | else { | ||
359 | q->end_flush_fn(q, flush_rq); | ||
360 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
361 | q->request_fn(q); | ||
362 | } | ||
363 | } | 368 | } |
364 | 369 | ||
365 | static void blk_post_flush_end_io(struct request *flush_rq) | 370 | unsigned blk_ordered_req_seq(struct request *rq) |
366 | { | 371 | { |
367 | struct request *rq = flush_rq->end_io_data; | ||
368 | request_queue_t *q = rq->q; | 372 | request_queue_t *q = rq->q; |
369 | 373 | ||
370 | elv_completed_request(q, flush_rq); | 374 | BUG_ON(q->ordseq == 0); |
371 | 375 | ||
372 | rq->flags |= REQ_BAR_POSTFLUSH; | 376 | if (rq == &q->pre_flush_rq) |
377 | return QUEUE_ORDSEQ_PREFLUSH; | ||
378 | if (rq == &q->bar_rq) | ||
379 | return QUEUE_ORDSEQ_BAR; | ||
380 | if (rq == &q->post_flush_rq) | ||
381 | return QUEUE_ORDSEQ_POSTFLUSH; | ||
373 | 382 | ||
374 | q->end_flush_fn(q, flush_rq); | 383 | if ((rq->flags & REQ_ORDERED_COLOR) == |
375 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | 384 | (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) |
376 | q->request_fn(q); | 385 | return QUEUE_ORDSEQ_DRAIN; |
386 | else | ||
387 | return QUEUE_ORDSEQ_DONE; | ||
377 | } | 388 | } |
378 | 389 | ||
379 | struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) | 390 | void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) |
380 | { | 391 | { |
381 | struct request *flush_rq = q->flush_rq; | 392 | struct request *rq; |
382 | 393 | int uptodate; | |
383 | BUG_ON(!blk_barrier_rq(rq)); | ||
384 | 394 | ||
385 | if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) | 395 | if (error && !q->orderr) |
386 | return NULL; | 396 | q->orderr = error; |
387 | 397 | ||
388 | rq_init(q, flush_rq); | 398 | BUG_ON(q->ordseq & seq); |
389 | flush_rq->elevator_private = NULL; | 399 | q->ordseq |= seq; |
390 | flush_rq->flags = REQ_BAR_FLUSH; | ||
391 | flush_rq->rq_disk = rq->rq_disk; | ||
392 | flush_rq->rl = NULL; | ||
393 | 400 | ||
394 | /* | 401 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
395 | * prepare_flush returns 0 if no flush is needed, just mark both | 402 | return; |
396 | * pre and post flush as done in that case | ||
397 | */ | ||
398 | if (!q->prepare_flush_fn(q, flush_rq)) { | ||
399 | rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; | ||
400 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
401 | return rq; | ||
402 | } | ||
403 | 403 | ||
404 | /* | 404 | /* |
405 | * some drivers dequeue requests right away, some only after io | 405 | * Okay, sequence complete. |
406 | * completion. make sure the request is dequeued. | ||
407 | */ | 406 | */ |
408 | if (!list_empty(&rq->queuelist)) | 407 | rq = q->orig_bar_rq; |
409 | blkdev_dequeue_request(rq); | 408 | uptodate = q->orderr ? q->orderr : 1; |
410 | 409 | ||
411 | flush_rq->end_io_data = rq; | 410 | q->ordseq = 0; |
412 | flush_rq->end_io = blk_pre_flush_end_io; | ||
413 | 411 | ||
414 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 412 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
415 | return flush_rq; | 413 | end_that_request_last(rq, uptodate); |
416 | } | 414 | } |
417 | 415 | ||
418 | static void blk_start_post_flush(request_queue_t *q, struct request *rq) | 416 | static void pre_flush_end_io(struct request *rq, int error) |
419 | { | 417 | { |
420 | struct request *flush_rq = q->flush_rq; | 418 | elv_completed_request(rq->q, rq); |
419 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | ||
420 | } | ||
421 | 421 | ||
422 | BUG_ON(!blk_barrier_rq(rq)); | 422 | static void bar_end_io(struct request *rq, int error) |
423 | { | ||
424 | elv_completed_request(rq->q, rq); | ||
425 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | ||
426 | } | ||
423 | 427 | ||
424 | rq_init(q, flush_rq); | 428 | static void post_flush_end_io(struct request *rq, int error) |
425 | flush_rq->elevator_private = NULL; | 429 | { |
426 | flush_rq->flags = REQ_BAR_FLUSH; | 430 | elv_completed_request(rq->q, rq); |
427 | flush_rq->rq_disk = rq->rq_disk; | 431 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
428 | flush_rq->rl = NULL; | 432 | } |
429 | 433 | ||
430 | if (q->prepare_flush_fn(q, flush_rq)) { | 434 | static void queue_flush(request_queue_t *q, unsigned which) |
431 | flush_rq->end_io_data = rq; | 435 | { |
432 | flush_rq->end_io = blk_post_flush_end_io; | 436 | struct request *rq; |
437 | rq_end_io_fn *end_io; | ||
433 | 438 | ||
434 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 439 | if (which == QUEUE_ORDERED_PREFLUSH) { |
435 | q->request_fn(q); | 440 | rq = &q->pre_flush_rq; |
441 | end_io = pre_flush_end_io; | ||
442 | } else { | ||
443 | rq = &q->post_flush_rq; | ||
444 | end_io = post_flush_end_io; | ||
436 | } | 445 | } |
446 | |||
447 | rq_init(q, rq); | ||
448 | rq->flags = REQ_HARDBARRIER; | ||
449 | rq->elevator_private = NULL; | ||
450 | rq->rq_disk = q->bar_rq.rq_disk; | ||
451 | rq->rl = NULL; | ||
452 | rq->end_io = end_io; | ||
453 | q->prepare_flush_fn(q, rq); | ||
454 | |||
455 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
437 | } | 456 | } |
438 | 457 | ||
439 | static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, | 458 | static inline struct request *start_ordered(request_queue_t *q, |
440 | int sectors) | 459 | struct request *rq) |
441 | { | 460 | { |
442 | if (sectors > rq->nr_sectors) | 461 | q->bi_size = 0; |
443 | sectors = rq->nr_sectors; | 462 | q->orderr = 0; |
463 | q->ordered = q->next_ordered; | ||
464 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | ||
465 | |||
466 | /* | ||
467 | * Prep proxy barrier request. | ||
468 | */ | ||
469 | blkdev_dequeue_request(rq); | ||
470 | q->orig_bar_rq = rq; | ||
471 | rq = &q->bar_rq; | ||
472 | rq_init(q, rq); | ||
473 | rq->flags = bio_data_dir(q->orig_bar_rq->bio); | ||
474 | rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; | ||
475 | rq->elevator_private = NULL; | ||
476 | rq->rl = NULL; | ||
477 | init_request_from_bio(rq, q->orig_bar_rq->bio); | ||
478 | rq->end_io = bar_end_io; | ||
479 | |||
480 | /* | ||
481 | * Queue ordered sequence. As we stack them at the head, we | ||
482 | * need to queue in reverse order. Note that we rely on that | ||
483 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | ||
484 | * request gets inbetween ordered sequence. | ||
485 | */ | ||
486 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | ||
487 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | ||
488 | else | ||
489 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | ||
490 | |||
491 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
444 | 492 | ||
445 | rq->nr_sectors -= sectors; | 493 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { |
446 | return rq->nr_sectors; | 494 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); |
495 | rq = &q->pre_flush_rq; | ||
496 | } else | ||
497 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | ||
498 | |||
499 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | ||
500 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | ||
501 | else | ||
502 | rq = NULL; | ||
503 | |||
504 | return rq; | ||
447 | } | 505 | } |
448 | 506 | ||
449 | static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, | 507 | int blk_do_ordered(request_queue_t *q, struct request **rqp) |
450 | int sectors, int queue_locked) | ||
451 | { | 508 | { |
452 | if (q->ordered != QUEUE_ORDERED_FLUSH) | 509 | struct request *rq = *rqp, *allowed_rq; |
453 | return 0; | 510 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
454 | if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) | ||
455 | return 0; | ||
456 | if (blk_barrier_postflush(rq)) | ||
457 | return 0; | ||
458 | 511 | ||
459 | if (!blk_check_end_barrier(q, rq, sectors)) { | 512 | if (!q->ordseq) { |
460 | unsigned long flags = 0; | 513 | if (!is_barrier) |
514 | return 1; | ||
461 | 515 | ||
462 | if (!queue_locked) | 516 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
463 | spin_lock_irqsave(q->queue_lock, flags); | 517 | *rqp = start_ordered(q, rq); |
518 | return 1; | ||
519 | } else { | ||
520 | /* | ||
521 | * This can happen when the queue switches to | ||
522 | * ORDERED_NONE while this request is on it. | ||
523 | */ | ||
524 | blkdev_dequeue_request(rq); | ||
525 | end_that_request_first(rq, -EOPNOTSUPP, | ||
526 | rq->hard_nr_sectors); | ||
527 | end_that_request_last(rq, -EOPNOTSUPP); | ||
528 | *rqp = NULL; | ||
529 | return 0; | ||
530 | } | ||
531 | } | ||
464 | 532 | ||
465 | blk_start_post_flush(q, rq); | 533 | if (q->ordered & QUEUE_ORDERED_TAG) { |
534 | if (is_barrier && rq != &q->bar_rq) | ||
535 | *rqp = NULL; | ||
536 | return 1; | ||
537 | } | ||
466 | 538 | ||
467 | if (!queue_locked) | 539 | switch (blk_ordered_cur_seq(q)) { |
468 | spin_unlock_irqrestore(q->queue_lock, flags); | 540 | case QUEUE_ORDSEQ_PREFLUSH: |
541 | allowed_rq = &q->pre_flush_rq; | ||
542 | break; | ||
543 | case QUEUE_ORDSEQ_BAR: | ||
544 | allowed_rq = &q->bar_rq; | ||
545 | break; | ||
546 | case QUEUE_ORDSEQ_POSTFLUSH: | ||
547 | allowed_rq = &q->post_flush_rq; | ||
548 | break; | ||
549 | default: | ||
550 | allowed_rq = NULL; | ||
551 | break; | ||
469 | } | 552 | } |
470 | 553 | ||
554 | if (rq != allowed_rq && | ||
555 | (blk_fs_request(rq) || rq == &q->pre_flush_rq || | ||
556 | rq == &q->post_flush_rq)) | ||
557 | *rqp = NULL; | ||
558 | |||
471 | return 1; | 559 | return 1; |
472 | } | 560 | } |
473 | 561 | ||
474 | /** | 562 | static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) |
475 | * blk_complete_barrier_rq - complete possible barrier request | ||
476 | * @q: the request queue for the device | ||
477 | * @rq: the request | ||
478 | * @sectors: number of sectors to complete | ||
479 | * | ||
480 | * Description: | ||
481 | * Used in driver end_io handling to determine whether to postpone | ||
482 | * completion of a barrier request until a post flush has been done. This | ||
483 | * is the unlocked variant, used if the caller doesn't already hold the | ||
484 | * queue lock. | ||
485 | **/ | ||
486 | int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) | ||
487 | { | 563 | { |
488 | return __blk_complete_barrier_rq(q, rq, sectors, 0); | 564 | request_queue_t *q = bio->bi_private; |
565 | struct bio_vec *bvec; | ||
566 | int i; | ||
567 | |||
568 | /* | ||
569 | * This is dry run, restore bio_sector and size. We'll finish | ||
570 | * this request again with the original bi_end_io after an | ||
571 | * error occurs or post flush is complete. | ||
572 | */ | ||
573 | q->bi_size += bytes; | ||
574 | |||
575 | if (bio->bi_size) | ||
576 | return 1; | ||
577 | |||
578 | /* Rewind bvec's */ | ||
579 | bio->bi_idx = 0; | ||
580 | bio_for_each_segment(bvec, bio, i) { | ||
581 | bvec->bv_len += bvec->bv_offset; | ||
582 | bvec->bv_offset = 0; | ||
583 | } | ||
584 | |||
585 | /* Reset bio */ | ||
586 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
587 | bio->bi_size = q->bi_size; | ||
588 | bio->bi_sector -= (q->bi_size >> 9); | ||
589 | q->bi_size = 0; | ||
590 | |||
591 | return 0; | ||
489 | } | 592 | } |
490 | EXPORT_SYMBOL(blk_complete_barrier_rq); | ||
491 | 593 | ||
492 | /** | 594 | static inline int ordered_bio_endio(struct request *rq, struct bio *bio, |
493 | * blk_complete_barrier_rq_locked - complete possible barrier request | 595 | unsigned int nbytes, int error) |
494 | * @q: the request queue for the device | ||
495 | * @rq: the request | ||
496 | * @sectors: number of sectors to complete | ||
497 | * | ||
498 | * Description: | ||
499 | * See blk_complete_barrier_rq(). This variant must be used if the caller | ||
500 | * holds the queue lock. | ||
501 | **/ | ||
502 | int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, | ||
503 | int sectors) | ||
504 | { | 596 | { |
505 | return __blk_complete_barrier_rq(q, rq, sectors, 1); | 597 | request_queue_t *q = rq->q; |
598 | bio_end_io_t *endio; | ||
599 | void *private; | ||
600 | |||
601 | if (&q->bar_rq != rq) | ||
602 | return 0; | ||
603 | |||
604 | /* | ||
605 | * Okay, this is the barrier request in progress, dry finish it. | ||
606 | */ | ||
607 | if (error && !q->orderr) | ||
608 | q->orderr = error; | ||
609 | |||
610 | endio = bio->bi_end_io; | ||
611 | private = bio->bi_private; | ||
612 | bio->bi_end_io = flush_dry_bio_endio; | ||
613 | bio->bi_private = q; | ||
614 | |||
615 | bio_endio(bio, nbytes, error); | ||
616 | |||
617 | bio->bi_end_io = endio; | ||
618 | bio->bi_private = private; | ||
619 | |||
620 | return 1; | ||
506 | } | 621 | } |
507 | EXPORT_SYMBOL(blk_complete_barrier_rq_locked); | ||
508 | 622 | ||
509 | /** | 623 | /** |
510 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 624 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
@@ -555,7 +669,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) | |||
555 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | 669 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); |
556 | } | 670 | } |
557 | 671 | ||
558 | q->max_sectors = q->max_hw_sectors = max_sectors; | 672 | if (BLK_DEF_MAX_SECTORS > max_sectors) |
673 | q->max_hw_sectors = q->max_sectors = max_sectors; | ||
674 | else { | ||
675 | q->max_sectors = BLK_DEF_MAX_SECTORS; | ||
676 | q->max_hw_sectors = max_sectors; | ||
677 | } | ||
559 | } | 678 | } |
560 | 679 | ||
561 | EXPORT_SYMBOL(blk_queue_max_sectors); | 680 | EXPORT_SYMBOL(blk_queue_max_sectors); |
@@ -657,8 +776,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size); | |||
657 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) | 776 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) |
658 | { | 777 | { |
659 | /* zero is "infinity" */ | 778 | /* zero is "infinity" */ |
660 | t->max_sectors = t->max_hw_sectors = | 779 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); |
661 | min_not_zero(t->max_sectors,b->max_sectors); | 780 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); |
662 | 781 | ||
663 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | 782 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); |
664 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | 783 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); |
@@ -1034,12 +1153,13 @@ void blk_queue_invalidate_tags(request_queue_t *q) | |||
1034 | 1153 | ||
1035 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1154 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1036 | 1155 | ||
1037 | static char *rq_flags[] = { | 1156 | static const char * const rq_flags[] = { |
1038 | "REQ_RW", | 1157 | "REQ_RW", |
1039 | "REQ_FAILFAST", | 1158 | "REQ_FAILFAST", |
1040 | "REQ_SORTED", | 1159 | "REQ_SORTED", |
1041 | "REQ_SOFTBARRIER", | 1160 | "REQ_SOFTBARRIER", |
1042 | "REQ_HARDBARRIER", | 1161 | "REQ_HARDBARRIER", |
1162 | "REQ_FUA", | ||
1043 | "REQ_CMD", | 1163 | "REQ_CMD", |
1044 | "REQ_NOMERGE", | 1164 | "REQ_NOMERGE", |
1045 | "REQ_STARTED", | 1165 | "REQ_STARTED", |
@@ -1059,6 +1179,7 @@ static char *rq_flags[] = { | |||
1059 | "REQ_PM_SUSPEND", | 1179 | "REQ_PM_SUSPEND", |
1060 | "REQ_PM_RESUME", | 1180 | "REQ_PM_RESUME", |
1061 | "REQ_PM_SHUTDOWN", | 1181 | "REQ_PM_SHUTDOWN", |
1182 | "REQ_ORDERED_COLOR", | ||
1062 | }; | 1183 | }; |
1063 | 1184 | ||
1064 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1185 | void blk_dump_rq_flags(struct request *rq, char *msg) |
@@ -1293,9 +1414,15 @@ static inline int ll_new_hw_segment(request_queue_t *q, | |||
1293 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, | 1414 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, |
1294 | struct bio *bio) | 1415 | struct bio *bio) |
1295 | { | 1416 | { |
1417 | unsigned short max_sectors; | ||
1296 | int len; | 1418 | int len; |
1297 | 1419 | ||
1298 | if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { | 1420 | if (unlikely(blk_pc_request(req))) |
1421 | max_sectors = q->max_hw_sectors; | ||
1422 | else | ||
1423 | max_sectors = q->max_sectors; | ||
1424 | |||
1425 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | ||
1299 | req->flags |= REQ_NOMERGE; | 1426 | req->flags |= REQ_NOMERGE; |
1300 | if (req == q->last_merge) | 1427 | if (req == q->last_merge) |
1301 | q->last_merge = NULL; | 1428 | q->last_merge = NULL; |
@@ -1325,9 +1452,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, | |||
1325 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, | 1452 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, |
1326 | struct bio *bio) | 1453 | struct bio *bio) |
1327 | { | 1454 | { |
1455 | unsigned short max_sectors; | ||
1328 | int len; | 1456 | int len; |
1329 | 1457 | ||
1330 | if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { | 1458 | if (unlikely(blk_pc_request(req))) |
1459 | max_sectors = q->max_hw_sectors; | ||
1460 | else | ||
1461 | max_sectors = q->max_sectors; | ||
1462 | |||
1463 | |||
1464 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | ||
1331 | req->flags |= REQ_NOMERGE; | 1465 | req->flags |= REQ_NOMERGE; |
1332 | if (req == q->last_merge) | 1466 | if (req == q->last_merge) |
1333 | q->last_merge = NULL; | 1467 | q->last_merge = NULL; |
@@ -1623,8 +1757,6 @@ void blk_cleanup_queue(request_queue_t * q) | |||
1623 | if (q->queue_tags) | 1757 | if (q->queue_tags) |
1624 | __blk_queue_free_tags(q); | 1758 | __blk_queue_free_tags(q); |
1625 | 1759 | ||
1626 | blk_queue_ordered(q, QUEUE_ORDERED_NONE); | ||
1627 | |||
1628 | kmem_cache_free(requestq_cachep, q); | 1760 | kmem_cache_free(requestq_cachep, q); |
1629 | } | 1761 | } |
1630 | 1762 | ||
@@ -1649,8 +1781,6 @@ static int blk_init_free_list(request_queue_t *q) | |||
1649 | return 0; | 1781 | return 0; |
1650 | } | 1782 | } |
1651 | 1783 | ||
1652 | static int __make_request(request_queue_t *, struct bio *); | ||
1653 | |||
1654 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) | 1784 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) |
1655 | { | 1785 | { |
1656 | return blk_alloc_queue_node(gfp_mask, -1); | 1786 | return blk_alloc_queue_node(gfp_mask, -1); |
@@ -1890,40 +2020,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | |||
1890 | { | 2020 | { |
1891 | struct request *rq = NULL; | 2021 | struct request *rq = NULL; |
1892 | struct request_list *rl = &q->rq; | 2022 | struct request_list *rl = &q->rq; |
1893 | struct io_context *ioc = current_io_context(GFP_ATOMIC); | 2023 | struct io_context *ioc = NULL; |
1894 | int priv; | 2024 | int may_queue, priv; |
1895 | |||
1896 | if (rl->count[rw]+1 >= q->nr_requests) { | ||
1897 | /* | ||
1898 | * The queue will fill after this allocation, so set it as | ||
1899 | * full, and mark this process as "batching". This process | ||
1900 | * will be allowed to complete a batch of requests, others | ||
1901 | * will be blocked. | ||
1902 | */ | ||
1903 | if (!blk_queue_full(q, rw)) { | ||
1904 | ioc_set_batching(q, ioc); | ||
1905 | blk_set_queue_full(q, rw); | ||
1906 | } | ||
1907 | } | ||
1908 | 2025 | ||
1909 | switch (elv_may_queue(q, rw, bio)) { | 2026 | may_queue = elv_may_queue(q, rw, bio); |
1910 | case ELV_MQUEUE_NO: | 2027 | if (may_queue == ELV_MQUEUE_NO) |
1911 | goto rq_starved; | 2028 | goto rq_starved; |
1912 | case ELV_MQUEUE_MAY: | ||
1913 | break; | ||
1914 | case ELV_MQUEUE_MUST: | ||
1915 | goto get_rq; | ||
1916 | } | ||
1917 | 2029 | ||
1918 | if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { | 2030 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
1919 | /* | 2031 | if (rl->count[rw]+1 >= q->nr_requests) { |
1920 | * The queue is full and the allocating process is not a | 2032 | ioc = current_io_context(GFP_ATOMIC); |
1921 | * "batcher", and not exempted by the IO scheduler | 2033 | /* |
1922 | */ | 2034 | * The queue will fill after this allocation, so set |
1923 | goto out; | 2035 | * it as full, and mark this process as "batching". |
2036 | * This process will be allowed to complete a batch of | ||
2037 | * requests, others will be blocked. | ||
2038 | */ | ||
2039 | if (!blk_queue_full(q, rw)) { | ||
2040 | ioc_set_batching(q, ioc); | ||
2041 | blk_set_queue_full(q, rw); | ||
2042 | } else { | ||
2043 | if (may_queue != ELV_MQUEUE_MUST | ||
2044 | && !ioc_batching(q, ioc)) { | ||
2045 | /* | ||
2046 | * The queue is full and the allocating | ||
2047 | * process is not a "batcher", and not | ||
2048 | * exempted by the IO scheduler | ||
2049 | */ | ||
2050 | goto out; | ||
2051 | } | ||
2052 | } | ||
2053 | } | ||
2054 | set_queue_congested(q, rw); | ||
1924 | } | 2055 | } |
1925 | 2056 | ||
1926 | get_rq: | ||
1927 | /* | 2057 | /* |
1928 | * Only allow batching queuers to allocate up to 50% over the defined | 2058 | * Only allow batching queuers to allocate up to 50% over the defined |
1929 | * limit of requests, otherwise we could have thousands of requests | 2059 | * limit of requests, otherwise we could have thousands of requests |
@@ -1934,8 +2064,6 @@ get_rq: | |||
1934 | 2064 | ||
1935 | rl->count[rw]++; | 2065 | rl->count[rw]++; |
1936 | rl->starved[rw] = 0; | 2066 | rl->starved[rw] = 0; |
1937 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) | ||
1938 | set_queue_congested(q, rw); | ||
1939 | 2067 | ||
1940 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2068 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1941 | if (priv) | 2069 | if (priv) |
@@ -1944,7 +2072,7 @@ get_rq: | |||
1944 | spin_unlock_irq(q->queue_lock); | 2072 | spin_unlock_irq(q->queue_lock); |
1945 | 2073 | ||
1946 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); | 2074 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
1947 | if (!rq) { | 2075 | if (unlikely(!rq)) { |
1948 | /* | 2076 | /* |
1949 | * Allocation failed presumably due to memory. Undo anything | 2077 | * Allocation failed presumably due to memory. Undo anything |
1950 | * we might have messed up. | 2078 | * we might have messed up. |
@@ -1969,6 +2097,12 @@ rq_starved: | |||
1969 | goto out; | 2097 | goto out; |
1970 | } | 2098 | } |
1971 | 2099 | ||
2100 | /* | ||
2101 | * ioc may be NULL here, and ioc_batching will be false. That's | ||
2102 | * OK, if the queue is under the request limit then requests need | ||
2103 | * not count toward the nr_batch_requests limit. There will always | ||
2104 | * be some limit enforced by BLK_BATCH_TIME. | ||
2105 | */ | ||
1972 | if (ioc_batching(q, ioc)) | 2106 | if (ioc_batching(q, ioc)) |
1973 | ioc->nr_batch_requests--; | 2107 | ioc->nr_batch_requests--; |
1974 | 2108 | ||
@@ -2144,7 +2278,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, | |||
2144 | struct bio *bio; | 2278 | struct bio *bio; |
2145 | int reading; | 2279 | int reading; |
2146 | 2280 | ||
2147 | if (len > (q->max_sectors << 9)) | 2281 | if (len > (q->max_hw_sectors << 9)) |
2148 | return -EINVAL; | 2282 | return -EINVAL; |
2149 | if (!len || !ubuf) | 2283 | if (!len || !ubuf) |
2150 | return -EINVAL; | 2284 | return -EINVAL; |
@@ -2259,7 +2393,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, | |||
2259 | { | 2393 | { |
2260 | struct bio *bio; | 2394 | struct bio *bio; |
2261 | 2395 | ||
2262 | if (len > (q->max_sectors << 9)) | 2396 | if (len > (q->max_hw_sectors << 9)) |
2263 | return -EINVAL; | 2397 | return -EINVAL; |
2264 | if (!len || !kbuf) | 2398 | if (!len || !kbuf) |
2265 | return -EINVAL; | 2399 | return -EINVAL; |
@@ -2295,7 +2429,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); | |||
2295 | */ | 2429 | */ |
2296 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | 2430 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, |
2297 | struct request *rq, int at_head, | 2431 | struct request *rq, int at_head, |
2298 | void (*done)(struct request *)) | 2432 | rq_end_io_fn *done) |
2299 | { | 2433 | { |
2300 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2434 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2301 | 2435 | ||
@@ -2306,6 +2440,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | |||
2306 | generic_unplug_device(q); | 2440 | generic_unplug_device(q); |
2307 | } | 2441 | } |
2308 | 2442 | ||
2443 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | ||
2444 | |||
2309 | /** | 2445 | /** |
2310 | * blk_execute_rq - insert a request into queue for execution | 2446 | * blk_execute_rq - insert a request into queue for execution |
2311 | * @q: queue to insert the request in | 2447 | * @q: queue to insert the request in |
@@ -2444,7 +2580,7 @@ void disk_round_stats(struct gendisk *disk) | |||
2444 | /* | 2580 | /* |
2445 | * queue lock must be held | 2581 | * queue lock must be held |
2446 | */ | 2582 | */ |
2447 | static void __blk_put_request(request_queue_t *q, struct request *req) | 2583 | void __blk_put_request(request_queue_t *q, struct request *req) |
2448 | { | 2584 | { |
2449 | struct request_list *rl = req->rl; | 2585 | struct request_list *rl = req->rl; |
2450 | 2586 | ||
@@ -2473,6 +2609,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req) | |||
2473 | } | 2609 | } |
2474 | } | 2610 | } |
2475 | 2611 | ||
2612 | EXPORT_SYMBOL_GPL(__blk_put_request); | ||
2613 | |||
2476 | void blk_put_request(struct request *req) | 2614 | void blk_put_request(struct request *req) |
2477 | { | 2615 | { |
2478 | unsigned long flags; | 2616 | unsigned long flags; |
@@ -2495,7 +2633,7 @@ EXPORT_SYMBOL(blk_put_request); | |||
2495 | * blk_end_sync_rq - executes a completion event on a request | 2633 | * blk_end_sync_rq - executes a completion event on a request |
2496 | * @rq: request to complete | 2634 | * @rq: request to complete |
2497 | */ | 2635 | */ |
2498 | void blk_end_sync_rq(struct request *rq) | 2636 | void blk_end_sync_rq(struct request *rq, int error) |
2499 | { | 2637 | { |
2500 | struct completion *waiting = rq->waiting; | 2638 | struct completion *waiting = rq->waiting; |
2501 | 2639 | ||
@@ -2609,29 +2747,35 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) | |||
2609 | return 0; | 2747 | return 0; |
2610 | } | 2748 | } |
2611 | 2749 | ||
2612 | /** | 2750 | static void init_request_from_bio(struct request *req, struct bio *bio) |
2613 | * blk_attempt_remerge - attempt to remerge active head with next request | ||
2614 | * @q: The &request_queue_t belonging to the device | ||
2615 | * @rq: The head request (usually) | ||
2616 | * | ||
2617 | * Description: | ||
2618 | * For head-active devices, the queue can easily be unplugged so quickly | ||
2619 | * that proper merging is not done on the front request. This may hurt | ||
2620 | * performance greatly for some devices. The block layer cannot safely | ||
2621 | * do merging on that first request for these queues, but the driver can | ||
2622 | * call this function and make it happen any way. Only the driver knows | ||
2623 | * when it is safe to do so. | ||
2624 | **/ | ||
2625 | void blk_attempt_remerge(request_queue_t *q, struct request *rq) | ||
2626 | { | 2751 | { |
2627 | unsigned long flags; | 2752 | req->flags |= REQ_CMD; |
2628 | 2753 | ||
2629 | spin_lock_irqsave(q->queue_lock, flags); | 2754 | /* |
2630 | attempt_back_merge(q, rq); | 2755 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) |
2631 | spin_unlock_irqrestore(q->queue_lock, flags); | 2756 | */ |
2632 | } | 2757 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
2758 | req->flags |= REQ_FAILFAST; | ||
2633 | 2759 | ||
2634 | EXPORT_SYMBOL(blk_attempt_remerge); | 2760 | /* |
2761 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2762 | */ | ||
2763 | if (unlikely(bio_barrier(bio))) | ||
2764 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2765 | |||
2766 | req->errors = 0; | ||
2767 | req->hard_sector = req->sector = bio->bi_sector; | ||
2768 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); | ||
2769 | req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); | ||
2770 | req->nr_phys_segments = bio_phys_segments(req->q, bio); | ||
2771 | req->nr_hw_segments = bio_hw_segments(req->q, bio); | ||
2772 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2773 | req->waiting = NULL; | ||
2774 | req->bio = req->biotail = bio; | ||
2775 | req->ioprio = bio_prio(bio); | ||
2776 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2777 | req->start_time = jiffies; | ||
2778 | } | ||
2635 | 2779 | ||
2636 | static int __make_request(request_queue_t *q, struct bio *bio) | 2780 | static int __make_request(request_queue_t *q, struct bio *bio) |
2637 | { | 2781 | { |
@@ -2658,7 +2802,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) | |||
2658 | spin_lock_prefetch(q->queue_lock); | 2802 | spin_lock_prefetch(q->queue_lock); |
2659 | 2803 | ||
2660 | barrier = bio_barrier(bio); | 2804 | barrier = bio_barrier(bio); |
2661 | if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { | 2805 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2662 | err = -EOPNOTSUPP; | 2806 | err = -EOPNOTSUPP; |
2663 | goto end_io; | 2807 | goto end_io; |
2664 | } | 2808 | } |
@@ -2728,33 +2872,7 @@ get_rq: | |||
2728 | * We don't worry about that case for efficiency. It won't happen | 2872 | * We don't worry about that case for efficiency. It won't happen |
2729 | * often, and the elevators are able to handle it. | 2873 | * often, and the elevators are able to handle it. |
2730 | */ | 2874 | */ |
2731 | 2875 | init_request_from_bio(req, bio); | |
2732 | req->flags |= REQ_CMD; | ||
2733 | |||
2734 | /* | ||
2735 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | ||
2736 | */ | ||
2737 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | ||
2738 | req->flags |= REQ_FAILFAST; | ||
2739 | |||
2740 | /* | ||
2741 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2742 | */ | ||
2743 | if (unlikely(barrier)) | ||
2744 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2745 | |||
2746 | req->errors = 0; | ||
2747 | req->hard_sector = req->sector = sector; | ||
2748 | req->hard_nr_sectors = req->nr_sectors = nr_sectors; | ||
2749 | req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; | ||
2750 | req->nr_phys_segments = bio_phys_segments(q, bio); | ||
2751 | req->nr_hw_segments = bio_hw_segments(q, bio); | ||
2752 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2753 | req->waiting = NULL; | ||
2754 | req->bio = req->biotail = bio; | ||
2755 | req->ioprio = prio; | ||
2756 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2757 | req->start_time = jiffies; | ||
2758 | 2876 | ||
2759 | spin_lock_irq(q->queue_lock); | 2877 | spin_lock_irq(q->queue_lock); |
2760 | if (elv_queue_empty(q)) | 2878 | if (elv_queue_empty(q)) |
@@ -3045,7 +3163,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3045 | if (nr_bytes >= bio->bi_size) { | 3163 | if (nr_bytes >= bio->bi_size) { |
3046 | req->bio = bio->bi_next; | 3164 | req->bio = bio->bi_next; |
3047 | nbytes = bio->bi_size; | 3165 | nbytes = bio->bi_size; |
3048 | bio_endio(bio, nbytes, error); | 3166 | if (!ordered_bio_endio(req, bio, nbytes, error)) |
3167 | bio_endio(bio, nbytes, error); | ||
3049 | next_idx = 0; | 3168 | next_idx = 0; |
3050 | bio_nbytes = 0; | 3169 | bio_nbytes = 0; |
3051 | } else { | 3170 | } else { |
@@ -3100,7 +3219,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3100 | * if the request wasn't completed, update state | 3219 | * if the request wasn't completed, update state |
3101 | */ | 3220 | */ |
3102 | if (bio_nbytes) { | 3221 | if (bio_nbytes) { |
3103 | bio_endio(bio, bio_nbytes, error); | 3222 | if (!ordered_bio_endio(req, bio, bio_nbytes, error)) |
3223 | bio_endio(bio, bio_nbytes, error); | ||
3104 | bio->bi_idx += next_idx; | 3224 | bio->bi_idx += next_idx; |
3105 | bio_iovec(bio)->bv_offset += nr_bytes; | 3225 | bio_iovec(bio)->bv_offset += nr_bytes; |
3106 | bio_iovec(bio)->bv_len -= nr_bytes; | 3226 | bio_iovec(bio)->bv_len -= nr_bytes; |
@@ -3155,11 +3275,100 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | |||
3155 | EXPORT_SYMBOL(end_that_request_chunk); | 3275 | EXPORT_SYMBOL(end_that_request_chunk); |
3156 | 3276 | ||
3157 | /* | 3277 | /* |
3278 | * splice the completion data to a local structure and hand off to | ||
3279 | * process_completion_queue() to complete the requests | ||
3280 | */ | ||
3281 | static void blk_done_softirq(struct softirq_action *h) | ||
3282 | { | ||
3283 | struct list_head *cpu_list; | ||
3284 | LIST_HEAD(local_list); | ||
3285 | |||
3286 | local_irq_disable(); | ||
3287 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
3288 | list_splice_init(cpu_list, &local_list); | ||
3289 | local_irq_enable(); | ||
3290 | |||
3291 | while (!list_empty(&local_list)) { | ||
3292 | struct request *rq = list_entry(local_list.next, struct request, donelist); | ||
3293 | |||
3294 | list_del_init(&rq->donelist); | ||
3295 | rq->q->softirq_done_fn(rq); | ||
3296 | } | ||
3297 | } | ||
3298 | |||
3299 | #ifdef CONFIG_HOTPLUG_CPU | ||
3300 | |||
3301 | static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | ||
3302 | void *hcpu) | ||
3303 | { | ||
3304 | /* | ||
3305 | * If a CPU goes away, splice its entries to the current CPU | ||
3306 | * and trigger a run of the softirq | ||
3307 | */ | ||
3308 | if (action == CPU_DEAD) { | ||
3309 | int cpu = (unsigned long) hcpu; | ||
3310 | |||
3311 | local_irq_disable(); | ||
3312 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | ||
3313 | &__get_cpu_var(blk_cpu_done)); | ||
3314 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
3315 | local_irq_enable(); | ||
3316 | } | ||
3317 | |||
3318 | return NOTIFY_OK; | ||
3319 | } | ||
3320 | |||
3321 | |||
3322 | static struct notifier_block __devinitdata blk_cpu_notifier = { | ||
3323 | .notifier_call = blk_cpu_notify, | ||
3324 | }; | ||
3325 | |||
3326 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
3327 | |||
3328 | /** | ||
3329 | * blk_complete_request - end I/O on a request | ||
3330 | * @req: the request being processed | ||
3331 | * | ||
3332 | * Description: | ||
3333 | * Ends all I/O on a request. It does not handle partial completions, | ||
3334 | * unless the driver actually implements this in its completionc callback | ||
3335 | * through requeueing. Theh actual completion happens out-of-order, | ||
3336 | * through a softirq handler. The user must have registered a completion | ||
3337 | * callback through blk_queue_softirq_done(). | ||
3338 | **/ | ||
3339 | |||
3340 | void blk_complete_request(struct request *req) | ||
3341 | { | ||
3342 | struct list_head *cpu_list; | ||
3343 | unsigned long flags; | ||
3344 | |||
3345 | BUG_ON(!req->q->softirq_done_fn); | ||
3346 | |||
3347 | local_irq_save(flags); | ||
3348 | |||
3349 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
3350 | list_add_tail(&req->donelist, cpu_list); | ||
3351 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
3352 | |||
3353 | local_irq_restore(flags); | ||
3354 | } | ||
3355 | |||
3356 | EXPORT_SYMBOL(blk_complete_request); | ||
3357 | |||
3358 | /* | ||
3158 | * queue lock must be held | 3359 | * queue lock must be held |
3159 | */ | 3360 | */ |
3160 | void end_that_request_last(struct request *req) | 3361 | void end_that_request_last(struct request *req, int uptodate) |
3161 | { | 3362 | { |
3162 | struct gendisk *disk = req->rq_disk; | 3363 | struct gendisk *disk = req->rq_disk; |
3364 | int error; | ||
3365 | |||
3366 | /* | ||
3367 | * extend uptodate bool to allow < 0 value to be direct io error | ||
3368 | */ | ||
3369 | error = 0; | ||
3370 | if (end_io_error(uptodate)) | ||
3371 | error = !uptodate ? -EIO : uptodate; | ||
3163 | 3372 | ||
3164 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3373 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3165 | laptop_io_completion(); | 3374 | laptop_io_completion(); |
@@ -3174,7 +3383,7 @@ void end_that_request_last(struct request *req) | |||
3174 | disk->in_flight--; | 3383 | disk->in_flight--; |
3175 | } | 3384 | } |
3176 | if (req->end_io) | 3385 | if (req->end_io) |
3177 | req->end_io(req); | 3386 | req->end_io(req, error); |
3178 | else | 3387 | else |
3179 | __blk_put_request(req->q, req); | 3388 | __blk_put_request(req->q, req); |
3180 | } | 3389 | } |
@@ -3186,7 +3395,7 @@ void end_request(struct request *req, int uptodate) | |||
3186 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3395 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { |
3187 | add_disk_randomness(req->rq_disk); | 3396 | add_disk_randomness(req->rq_disk); |
3188 | blkdev_dequeue_request(req); | 3397 | blkdev_dequeue_request(req); |
3189 | end_that_request_last(req); | 3398 | end_that_request_last(req, uptodate); |
3190 | } | 3399 | } |
3191 | } | 3400 | } |
3192 | 3401 | ||
@@ -3224,6 +3433,8 @@ EXPORT_SYMBOL(kblockd_flush); | |||
3224 | 3433 | ||
3225 | int __init blk_dev_init(void) | 3434 | int __init blk_dev_init(void) |
3226 | { | 3435 | { |
3436 | int i; | ||
3437 | |||
3227 | kblockd_workqueue = create_workqueue("kblockd"); | 3438 | kblockd_workqueue = create_workqueue("kblockd"); |
3228 | if (!kblockd_workqueue) | 3439 | if (!kblockd_workqueue) |
3229 | panic("Failed to create kblockd\n"); | 3440 | panic("Failed to create kblockd\n"); |
@@ -3237,6 +3448,14 @@ int __init blk_dev_init(void) | |||
3237 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3448 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
3238 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); | 3449 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); |
3239 | 3450 | ||
3451 | for (i = 0; i < NR_CPUS; i++) | ||
3452 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | ||
3453 | |||
3454 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | ||
3455 | #ifdef CONFIG_HOTPLUG_CPU | ||
3456 | register_cpu_notifier(&blk_cpu_notifier); | ||
3457 | #endif | ||
3458 | |||
3240 | blk_max_low_pfn = max_low_pfn; | 3459 | blk_max_low_pfn = max_low_pfn; |
3241 | blk_max_pfn = max_pfn; | 3460 | blk_max_pfn = max_pfn; |
3242 | 3461 | ||
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 382dea7b224c..cc72210687eb 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <linux/capability.h> | ||
24 | #include <linux/completion.h> | 25 | #include <linux/completion.h> |
25 | #include <linux/cdrom.h> | 26 | #include <linux/cdrom.h> |
26 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -46,7 +47,7 @@ EXPORT_SYMBOL(scsi_command_size); | |||
46 | 47 | ||
47 | static int sg_get_version(int __user *p) | 48 | static int sg_get_version(int __user *p) |
48 | { | 49 | { |
49 | static int sg_version_num = 30527; | 50 | static const int sg_version_num = 30527; |
50 | return put_user(sg_version_num, p); | 51 | return put_user(sg_version_num, p); |
51 | } | 52 | } |
52 | 53 | ||
@@ -190,16 +191,21 @@ static int verify_command(struct file *file, unsigned char *cmd) | |||
190 | safe_for_write(GPCMD_SET_STREAMING), | 191 | safe_for_write(GPCMD_SET_STREAMING), |
191 | }; | 192 | }; |
192 | unsigned char type = cmd_type[cmd[0]]; | 193 | unsigned char type = cmd_type[cmd[0]]; |
194 | int has_write_perm = 0; | ||
193 | 195 | ||
194 | /* Anybody who can open the device can do a read-safe command */ | 196 | /* Anybody who can open the device can do a read-safe command */ |
195 | if (type & CMD_READ_SAFE) | 197 | if (type & CMD_READ_SAFE) |
196 | return 0; | 198 | return 0; |
197 | 199 | ||
200 | /* | ||
201 | * file can be NULL from ioctl_by_bdev()... | ||
202 | */ | ||
203 | if (file) | ||
204 | has_write_perm = file->f_mode & FMODE_WRITE; | ||
205 | |||
198 | /* Write-safe commands just require a writable open.. */ | 206 | /* Write-safe commands just require a writable open.. */ |
199 | if (type & CMD_WRITE_SAFE) { | 207 | if ((type & CMD_WRITE_SAFE) && has_write_perm) |
200 | if (file->f_mode & FMODE_WRITE) | 208 | return 0; |
201 | return 0; | ||
202 | } | ||
203 | 209 | ||
204 | /* And root can do any command.. */ | 210 | /* And root can do any command.. */ |
205 | if (capable(CAP_SYS_RAWIO)) | 211 | if (capable(CAP_SYS_RAWIO)) |
@@ -233,7 +239,7 @@ static int sg_io(struct file *file, request_queue_t *q, | |||
233 | if (verify_command(file, cmd)) | 239 | if (verify_command(file, cmd)) |
234 | return -EPERM; | 240 | return -EPERM; |
235 | 241 | ||
236 | if (hdr->dxfer_len > (q->max_sectors << 9)) | 242 | if (hdr->dxfer_len > (q->max_hw_sectors << 9)) |
237 | return -EIO; | 243 | return -EIO; |
238 | 244 | ||
239 | if (hdr->dxfer_len) | 245 | if (hdr->dxfer_len) |
@@ -442,11 +448,37 @@ error: | |||
442 | return err; | 448 | return err; |
443 | } | 449 | } |
444 | 450 | ||
451 | |||
452 | /* Send basic block requests */ | ||
453 | static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data) | ||
454 | { | ||
455 | struct request *rq; | ||
456 | int err; | ||
457 | |||
458 | rq = blk_get_request(q, WRITE, __GFP_WAIT); | ||
459 | rq->flags |= REQ_BLOCK_PC; | ||
460 | rq->data = NULL; | ||
461 | rq->data_len = 0; | ||
462 | rq->timeout = BLK_DEFAULT_TIMEOUT; | ||
463 | memset(rq->cmd, 0, sizeof(rq->cmd)); | ||
464 | rq->cmd[0] = cmd; | ||
465 | rq->cmd[4] = data; | ||
466 | rq->cmd_len = 6; | ||
467 | err = blk_execute_rq(q, bd_disk, rq, 0); | ||
468 | blk_put_request(rq); | ||
469 | |||
470 | return err; | ||
471 | } | ||
472 | |||
473 | static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data) | ||
474 | { | ||
475 | return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); | ||
476 | } | ||
477 | |||
445 | int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) | 478 | int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) |
446 | { | 479 | { |
447 | request_queue_t *q; | 480 | request_queue_t *q; |
448 | struct request *rq; | 481 | int err; |
449 | int close = 0, err; | ||
450 | 482 | ||
451 | q = bd_disk->queue; | 483 | q = bd_disk->queue; |
452 | if (!q) | 484 | if (!q) |
@@ -564,19 +596,10 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, | |||
564 | err = sg_scsi_ioctl(file, q, bd_disk, arg); | 596 | err = sg_scsi_ioctl(file, q, bd_disk, arg); |
565 | break; | 597 | break; |
566 | case CDROMCLOSETRAY: | 598 | case CDROMCLOSETRAY: |
567 | close = 1; | 599 | err = blk_send_start_stop(q, bd_disk, 0x03); |
600 | break; | ||
568 | case CDROMEJECT: | 601 | case CDROMEJECT: |
569 | rq = blk_get_request(q, WRITE, __GFP_WAIT); | 602 | err = blk_send_start_stop(q, bd_disk, 0x02); |
570 | rq->flags |= REQ_BLOCK_PC; | ||
571 | rq->data = NULL; | ||
572 | rq->data_len = 0; | ||
573 | rq->timeout = BLK_DEFAULT_TIMEOUT; | ||
574 | memset(rq->cmd, 0, sizeof(rq->cmd)); | ||
575 | rq->cmd[0] = GPCMD_START_STOP_UNIT; | ||
576 | rq->cmd[4] = 0x02 + (close != 0); | ||
577 | rq->cmd_len = 6; | ||
578 | err = blk_execute_rq(q, bd_disk, rq, 0); | ||
579 | blk_put_request(rq); | ||
580 | break; | 603 | break; |
581 | default: | 604 | default: |
582 | err = -ENOTTY; | 605 | err = -ENOTTY; |