aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2006-01-06 03:51:03 -0500
committerJens Axboe <axboe@suse.de>2006-01-06 03:51:03 -0500
commit797e7dbbee0a91fa1349192f18ad5c454997d876 (patch)
treec0d5974f469dd2d3d4f9b15d87d201b61e248f54
parent52d9e675361261a1eb1716b02222ec6177ec342b (diff)
[BLOCK] reimplement handling of barrier request
Reimplement handling of barrier requests. * Flexible handling to deal with various capabilities of target devices. * Retry support for falling back. * Tagged queues which don't support ordered tag can do ordered. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r--block/elevator.c84
-rw-r--r--block/ll_rw_blk.c384
-rw-r--r--include/linux/blkdev.h82
-rw-r--r--include/linux/elevator.h1
4 files changed, 359 insertions, 192 deletions
diff --git a/block/elevator.c b/block/elevator.c
index 85a11cee7d1c..39dcccc82ada 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -304,15 +304,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
304 304
305 rq->flags &= ~REQ_STARTED; 305 rq->flags &= ~REQ_STARTED;
306 306
307 /* 307 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
308 * if this is the flush, requeue the original instead and drop the flush
309 */
310 if (rq->flags & REQ_BAR_FLUSH) {
311 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
312 rq = rq->end_io_data;
313 }
314
315 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
316} 308}
317 309
318static void elv_drain_elevator(request_queue_t *q) 310static void elv_drain_elevator(request_queue_t *q)
@@ -332,8 +324,19 @@ static void elv_drain_elevator(request_queue_t *q)
332void __elv_add_request(request_queue_t *q, struct request *rq, int where, 324void __elv_add_request(request_queue_t *q, struct request *rq, int where,
333 int plug) 325 int plug)
334{ 326{
327 struct list_head *pos;
328 unsigned ordseq;
329
330 if (q->ordcolor)
331 rq->flags |= REQ_ORDERED_COLOR;
332
335 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 333 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
336 /* 334 /*
335 * toggle ordered color
336 */
337 q->ordcolor ^= 1;
338
339 /*
337 * barriers implicitly indicate back insertion 340 * barriers implicitly indicate back insertion
338 */ 341 */
339 if (where == ELEVATOR_INSERT_SORT) 342 if (where == ELEVATOR_INSERT_SORT)
@@ -393,6 +396,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
393 q->elevator->ops->elevator_add_req_fn(q, rq); 396 q->elevator->ops->elevator_add_req_fn(q, rq);
394 break; 397 break;
395 398
399 case ELEVATOR_INSERT_REQUEUE:
400 /*
401 * If ordered flush isn't in progress, we do front
402 * insertion; otherwise, requests should be requeued
403 * in ordseq order.
404 */
405 rq->flags |= REQ_SOFTBARRIER;
406
407 if (q->ordseq == 0) {
408 list_add(&rq->queuelist, &q->queue_head);
409 break;
410 }
411
412 ordseq = blk_ordered_req_seq(rq);
413
414 list_for_each(pos, &q->queue_head) {
415 struct request *pos_rq = list_entry_rq(pos);
416 if (ordseq <= blk_ordered_req_seq(pos_rq))
417 break;
418 }
419
420 list_add_tail(&rq->queuelist, pos);
421 break;
422
396 default: 423 default:
397 printk(KERN_ERR "%s: bad insertion point %d\n", 424 printk(KERN_ERR "%s: bad insertion point %d\n",
398 __FUNCTION__, where); 425 __FUNCTION__, where);
@@ -422,25 +449,16 @@ static inline struct request *__elv_next_request(request_queue_t *q)
422{ 449{
423 struct request *rq; 450 struct request *rq;
424 451
425 if (unlikely(list_empty(&q->queue_head) && 452 while (1) {
426 !q->elevator->ops->elevator_dispatch_fn(q, 0))) 453 while (!list_empty(&q->queue_head)) {
427 return NULL; 454 rq = list_entry_rq(q->queue_head.next);
428 455 if (blk_do_ordered(q, &rq))
429 rq = list_entry_rq(q->queue_head.next); 456 return rq;
430 457 }
431 /*
432 * if this is a barrier write and the device has to issue a
433 * flush sequence to support it, check how far we are
434 */
435 if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
436 BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
437 458
438 if (q->ordered == QUEUE_ORDERED_FLUSH && 459 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
439 !blk_barrier_preflush(rq)) 460 return NULL;
440 rq = blk_start_pre_flush(q, rq);
441 } 461 }
442
443 return rq;
444} 462}
445 463
446struct request *elv_next_request(request_queue_t *q) 464struct request *elv_next_request(request_queue_t *q)
@@ -593,7 +611,21 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
593 * request is released from the driver, io must be done 611 * request is released from the driver, io must be done
594 */ 612 */
595 if (blk_account_rq(rq)) { 613 if (blk_account_rq(rq)) {
614 struct request *first_rq = list_entry_rq(q->queue_head.next);
615
596 q->in_flight--; 616 q->in_flight--;
617
618 /*
619 * Check if the queue is waiting for fs requests to be
620 * drained for flush sequence.
621 */
622 if (q->ordseq && q->in_flight == 0 &&
623 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
624 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
625 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
626 q->request_fn(q);
627 }
628
597 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 629 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
598 e->ops->elevator_completed_req_fn(q, rq); 630 e->ops->elevator_completed_req_fn(q, rq);
599 } 631 }
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 65c4efc02adf..91d3b4828c49 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -290,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
290 290
291/** 291/**
292 * blk_queue_ordered - does this queue support ordered writes 292 * blk_queue_ordered - does this queue support ordered writes
293 * @q: the request queue 293 * @q: the request queue
294 * @flag: see below 294 * @ordered: one of QUEUE_ORDERED_*
295 * 295 *
296 * Description: 296 * Description:
297 * For journalled file systems, doing ordered writes on a commit 297 * For journalled file systems, doing ordered writes on a commit
@@ -300,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
300 * feature should call this function and indicate so. 300 * feature should call this function and indicate so.
301 * 301 *
302 **/ 302 **/
303void blk_queue_ordered(request_queue_t *q, int flag) 303int blk_queue_ordered(request_queue_t *q, unsigned ordered,
304{ 304 prepare_flush_fn *prepare_flush_fn)
305 switch (flag) { 305{
306 case QUEUE_ORDERED_NONE: 306 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
307 if (q->flush_rq) 307 prepare_flush_fn == NULL) {
308 kmem_cache_free(request_cachep, q->flush_rq); 308 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
309 q->flush_rq = NULL; 309 return -EINVAL;
310 q->ordered = flag; 310 }
311 break; 311
312 case QUEUE_ORDERED_TAG: 312 if (ordered != QUEUE_ORDERED_NONE &&
313 q->ordered = flag; 313 ordered != QUEUE_ORDERED_DRAIN &&
314 break; 314 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
315 case QUEUE_ORDERED_FLUSH: 315 ordered != QUEUE_ORDERED_DRAIN_FUA &&
316 q->ordered = flag; 316 ordered != QUEUE_ORDERED_TAG &&
317 if (!q->flush_rq) 317 ordered != QUEUE_ORDERED_TAG_FLUSH &&
318 q->flush_rq = kmem_cache_alloc(request_cachep, 318 ordered != QUEUE_ORDERED_TAG_FUA) {
319 GFP_KERNEL); 319 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
320 break; 320 return -EINVAL;
321 default:
322 printk("blk_queue_ordered: bad value %d\n", flag);
323 break;
324 } 321 }
322
323 q->next_ordered = ordered;
324 q->prepare_flush_fn = prepare_flush_fn;
325
326 return 0;
325} 327}
326 328
327EXPORT_SYMBOL(blk_queue_ordered); 329EXPORT_SYMBOL(blk_queue_ordered);
@@ -346,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
346/* 348/*
347 * Cache flushing for ordered writes handling 349 * Cache flushing for ordered writes handling
348 */ 350 */
349static void blk_pre_flush_end_io(struct request *flush_rq, int error) 351inline unsigned blk_ordered_cur_seq(request_queue_t *q)
350{ 352{
351 struct request *rq = flush_rq->end_io_data; 353 if (!q->ordseq)
352 request_queue_t *q = rq->q; 354 return 0;
353 355 return 1 << ffz(q->ordseq);
354 elv_completed_request(q, flush_rq);
355
356 rq->flags |= REQ_BAR_PREFLUSH;
357
358 if (!flush_rq->errors)
359 elv_requeue_request(q, rq);
360 else {
361 q->end_flush_fn(q, flush_rq);
362 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
363 q->request_fn(q);
364 }
365} 356}
366 357
367static void blk_post_flush_end_io(struct request *flush_rq, int error) 358unsigned blk_ordered_req_seq(struct request *rq)
368{ 359{
369 struct request *rq = flush_rq->end_io_data;
370 request_queue_t *q = rq->q; 360 request_queue_t *q = rq->q;
371 361
372 elv_completed_request(q, flush_rq); 362 BUG_ON(q->ordseq == 0);
373 363
374 rq->flags |= REQ_BAR_POSTFLUSH; 364 if (rq == &q->pre_flush_rq)
365 return QUEUE_ORDSEQ_PREFLUSH;
366 if (rq == &q->bar_rq)
367 return QUEUE_ORDSEQ_BAR;
368 if (rq == &q->post_flush_rq)
369 return QUEUE_ORDSEQ_POSTFLUSH;
375 370
376 q->end_flush_fn(q, flush_rq); 371 if ((rq->flags & REQ_ORDERED_COLOR) ==
377 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 372 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
378 q->request_fn(q); 373 return QUEUE_ORDSEQ_DRAIN;
374 else
375 return QUEUE_ORDSEQ_DONE;
379} 376}
380 377
381struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 378void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
382{ 379{
383 struct request *flush_rq = q->flush_rq; 380 struct request *rq;
384 381 int uptodate;
385 BUG_ON(!blk_barrier_rq(rq));
386 382
387 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 383 if (error && !q->orderr)
388 return NULL; 384 q->orderr = error;
389 385
390 rq_init(q, flush_rq); 386 BUG_ON(q->ordseq & seq);
391 flush_rq->elevator_private = NULL; 387 q->ordseq |= seq;
392 flush_rq->flags = REQ_BAR_FLUSH;
393 flush_rq->rq_disk = rq->rq_disk;
394 flush_rq->rl = NULL;
395 388
396 /* 389 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
397 * prepare_flush returns 0 if no flush is needed, just mark both 390 return;
398 * pre and post flush as done in that case
399 */
400 if (!q->prepare_flush_fn(q, flush_rq)) {
401 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
402 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
403 return rq;
404 }
405 391
406 /* 392 /*
407 * some drivers dequeue requests right away, some only after io 393 * Okay, sequence complete.
408 * completion. make sure the request is dequeued.
409 */ 394 */
410 if (!list_empty(&rq->queuelist)) 395 rq = q->orig_bar_rq;
411 blkdev_dequeue_request(rq); 396 uptodate = q->orderr ? q->orderr : 1;
412 397
413 flush_rq->end_io_data = rq; 398 q->ordseq = 0;
414 flush_rq->end_io = blk_pre_flush_end_io;
415 399
416 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 400 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
417 return flush_rq; 401 end_that_request_last(rq, uptodate);
418} 402}
419 403
420static void blk_start_post_flush(request_queue_t *q, struct request *rq) 404static void pre_flush_end_io(struct request *rq, int error)
421{ 405{
422 struct request *flush_rq = q->flush_rq; 406 elv_completed_request(rq->q, rq);
407 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
408}
423 409
424 BUG_ON(!blk_barrier_rq(rq)); 410static void bar_end_io(struct request *rq, int error)
411{
412 elv_completed_request(rq->q, rq);
413 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
414}
425 415
426 rq_init(q, flush_rq); 416static void post_flush_end_io(struct request *rq, int error)
427 flush_rq->elevator_private = NULL; 417{
428 flush_rq->flags = REQ_BAR_FLUSH; 418 elv_completed_request(rq->q, rq);
429 flush_rq->rq_disk = rq->rq_disk; 419 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
430 flush_rq->rl = NULL; 420}
431 421
432 if (q->prepare_flush_fn(q, flush_rq)) { 422static void queue_flush(request_queue_t *q, unsigned which)
433 flush_rq->end_io_data = rq; 423{
434 flush_rq->end_io = blk_post_flush_end_io; 424 struct request *rq;
425 rq_end_io_fn *end_io;
435 426
436 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 427 if (which == QUEUE_ORDERED_PREFLUSH) {
437 q->request_fn(q); 428 rq = &q->pre_flush_rq;
429 end_io = pre_flush_end_io;
430 } else {
431 rq = &q->post_flush_rq;
432 end_io = post_flush_end_io;
438 } 433 }
434
435 rq_init(q, rq);
436 rq->flags = REQ_HARDBARRIER;
437 rq->elevator_private = NULL;
438 rq->rq_disk = q->bar_rq.rq_disk;
439 rq->rl = NULL;
440 rq->end_io = end_io;
441 q->prepare_flush_fn(q, rq);
442
443 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
439} 444}
440 445
441static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 446static inline struct request *start_ordered(request_queue_t *q,
442 int sectors) 447 struct request *rq)
443{ 448{
444 if (sectors > rq->nr_sectors) 449 q->bi_size = 0;
445 sectors = rq->nr_sectors; 450 q->orderr = 0;
451 q->ordered = q->next_ordered;
452 q->ordseq |= QUEUE_ORDSEQ_STARTED;
453
454 /*
455 * Prep proxy barrier request.
456 */
457 blkdev_dequeue_request(rq);
458 q->orig_bar_rq = rq;
459 rq = &q->bar_rq;
460 rq_init(q, rq);
461 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
462 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
463 rq->elevator_private = NULL;
464 rq->rl = NULL;
465 init_request_from_bio(rq, q->orig_bar_rq->bio);
466 rq->end_io = bar_end_io;
467
468 /*
469 * Queue ordered sequence. As we stack them at the head, we
470 * need to queue in reverse order. Note that we rely on that
471 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
472 * request gets inbetween ordered sequence.
473 */
474 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
475 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
476 else
477 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
478
479 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
480
481 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
482 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
483 rq = &q->pre_flush_rq;
484 } else
485 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
446 486
447 rq->nr_sectors -= sectors; 487 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
448 return rq->nr_sectors; 488 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
489 else
490 rq = NULL;
491
492 return rq;
449} 493}
450 494
451static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 495int blk_do_ordered(request_queue_t *q, struct request **rqp)
452 int sectors, int queue_locked)
453{ 496{
454 if (q->ordered != QUEUE_ORDERED_FLUSH) 497 struct request *rq = *rqp, *allowed_rq;
455 return 0; 498 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
456 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
457 return 0;
458 if (blk_barrier_postflush(rq))
459 return 0;
460 499
461 if (!blk_check_end_barrier(q, rq, sectors)) { 500 if (!q->ordseq) {
462 unsigned long flags = 0; 501 if (!is_barrier)
502 return 1;
463 503
464 if (!queue_locked) 504 if (q->next_ordered != QUEUE_ORDERED_NONE) {
465 spin_lock_irqsave(q->queue_lock, flags); 505 *rqp = start_ordered(q, rq);
506 return 1;
507 } else {
508 /*
509 * This can happen when the queue switches to
510 * ORDERED_NONE while this request is on it.
511 */
512 blkdev_dequeue_request(rq);
513 end_that_request_first(rq, -EOPNOTSUPP,
514 rq->hard_nr_sectors);
515 end_that_request_last(rq, -EOPNOTSUPP);
516 *rqp = NULL;
517 return 0;
518 }
519 }
466 520
467 blk_start_post_flush(q, rq); 521 if (q->ordered & QUEUE_ORDERED_TAG) {
522 if (is_barrier && rq != &q->bar_rq)
523 *rqp = NULL;
524 return 1;
525 }
468 526
469 if (!queue_locked) 527 switch (blk_ordered_cur_seq(q)) {
470 spin_unlock_irqrestore(q->queue_lock, flags); 528 case QUEUE_ORDSEQ_PREFLUSH:
529 allowed_rq = &q->pre_flush_rq;
530 break;
531 case QUEUE_ORDSEQ_BAR:
532 allowed_rq = &q->bar_rq;
533 break;
534 case QUEUE_ORDSEQ_POSTFLUSH:
535 allowed_rq = &q->post_flush_rq;
536 break;
537 default:
538 allowed_rq = NULL;
539 break;
471 } 540 }
472 541
542 if (rq != allowed_rq &&
543 (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
544 rq == &q->post_flush_rq))
545 *rqp = NULL;
546
473 return 1; 547 return 1;
474} 548}
475 549
476/** 550static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
477 * blk_complete_barrier_rq - complete possible barrier request
478 * @q: the request queue for the device
479 * @rq: the request
480 * @sectors: number of sectors to complete
481 *
482 * Description:
483 * Used in driver end_io handling to determine whether to postpone
484 * completion of a barrier request until a post flush has been done. This
485 * is the unlocked variant, used if the caller doesn't already hold the
486 * queue lock.
487 **/
488int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
489{ 551{
490 return __blk_complete_barrier_rq(q, rq, sectors, 0); 552 request_queue_t *q = bio->bi_private;
553 struct bio_vec *bvec;
554 int i;
555
556 /*
557 * This is dry run, restore bio_sector and size. We'll finish
558 * this request again with the original bi_end_io after an
559 * error occurs or post flush is complete.
560 */
561 q->bi_size += bytes;
562
563 if (bio->bi_size)
564 return 1;
565
566 /* Rewind bvec's */
567 bio->bi_idx = 0;
568 bio_for_each_segment(bvec, bio, i) {
569 bvec->bv_len += bvec->bv_offset;
570 bvec->bv_offset = 0;
571 }
572
573 /* Reset bio */
574 set_bit(BIO_UPTODATE, &bio->bi_flags);
575 bio->bi_size = q->bi_size;
576 bio->bi_sector -= (q->bi_size >> 9);
577 q->bi_size = 0;
578
579 return 0;
491} 580}
492EXPORT_SYMBOL(blk_complete_barrier_rq);
493 581
494/** 582static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
495 * blk_complete_barrier_rq_locked - complete possible barrier request 583 unsigned int nbytes, int error)
496 * @q: the request queue for the device
497 * @rq: the request
498 * @sectors: number of sectors to complete
499 *
500 * Description:
501 * See blk_complete_barrier_rq(). This variant must be used if the caller
502 * holds the queue lock.
503 **/
504int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
505 int sectors)
506{ 584{
507 return __blk_complete_barrier_rq(q, rq, sectors, 1); 585 request_queue_t *q = rq->q;
586 bio_end_io_t *endio;
587 void *private;
588
589 if (&q->bar_rq != rq)
590 return 0;
591
592 /*
593 * Okay, this is the barrier request in progress, dry finish it.
594 */
595 if (error && !q->orderr)
596 q->orderr = error;
597
598 endio = bio->bi_end_io;
599 private = bio->bi_private;
600 bio->bi_end_io = flush_dry_bio_endio;
601 bio->bi_private = q;
602
603 bio_endio(bio, nbytes, error);
604
605 bio->bi_end_io = endio;
606 bio->bi_private = private;
607
608 return 1;
508} 609}
509EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
510 610
511/** 611/**
512 * blk_queue_bounce_limit - set bounce buffer limit for queue 612 * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -1047,6 +1147,7 @@ static const char * const rq_flags[] = {
1047 "REQ_SORTED", 1147 "REQ_SORTED",
1048 "REQ_SOFTBARRIER", 1148 "REQ_SOFTBARRIER",
1049 "REQ_HARDBARRIER", 1149 "REQ_HARDBARRIER",
1150 "REQ_FUA",
1050 "REQ_CMD", 1151 "REQ_CMD",
1051 "REQ_NOMERGE", 1152 "REQ_NOMERGE",
1052 "REQ_STARTED", 1153 "REQ_STARTED",
@@ -1066,6 +1167,7 @@ static const char * const rq_flags[] = {
1066 "REQ_PM_SUSPEND", 1167 "REQ_PM_SUSPEND",
1067 "REQ_PM_RESUME", 1168 "REQ_PM_RESUME",
1068 "REQ_PM_SHUTDOWN", 1169 "REQ_PM_SHUTDOWN",
1170 "REQ_ORDERED_COLOR",
1069}; 1171};
1070 1172
1071void blk_dump_rq_flags(struct request *rq, char *msg) 1173void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1643,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q)
1643 if (q->queue_tags) 1745 if (q->queue_tags)
1644 __blk_queue_free_tags(q); 1746 __blk_queue_free_tags(q);
1645 1747
1646 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1647
1648 kmem_cache_free(requestq_cachep, q); 1748 kmem_cache_free(requestq_cachep, q);
1649} 1749}
1650 1750
@@ -2714,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2714 spin_lock_prefetch(q->queue_lock); 2814 spin_lock_prefetch(q->queue_lock);
2715 2815
2716 barrier = bio_barrier(bio); 2816 barrier = bio_barrier(bio);
2717 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2817 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2718 err = -EOPNOTSUPP; 2818 err = -EOPNOTSUPP;
2719 goto end_io; 2819 goto end_io;
2720 } 2820 }
@@ -3075,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3075 if (nr_bytes >= bio->bi_size) { 3175 if (nr_bytes >= bio->bi_size) {
3076 req->bio = bio->bi_next; 3176 req->bio = bio->bi_next;
3077 nbytes = bio->bi_size; 3177 nbytes = bio->bi_size;
3078 bio_endio(bio, nbytes, error); 3178 if (!ordered_bio_endio(req, bio, nbytes, error))
3179 bio_endio(bio, nbytes, error);
3079 next_idx = 0; 3180 next_idx = 0;
3080 bio_nbytes = 0; 3181 bio_nbytes = 0;
3081 } else { 3182 } else {
@@ -3130,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3130 * if the request wasn't completed, update state 3231 * if the request wasn't completed, update state
3131 */ 3232 */
3132 if (bio_nbytes) { 3233 if (bio_nbytes) {
3133 bio_endio(bio, bio_nbytes, error); 3234 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3235 bio_endio(bio, bio_nbytes, error);
3134 bio->bi_idx += next_idx; 3236 bio->bi_idx += next_idx;
3135 bio_iovec(bio)->bv_offset += nr_bytes; 3237 bio_iovec(bio)->bv_offset += nr_bytes;
3136 bio_iovec(bio)->bv_len -= nr_bytes; 3238 bio_iovec(bio)->bv_len -= nr_bytes;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a0ce8c585165..15db0f112d0a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -207,6 +207,7 @@ enum rq_flag_bits {
207 __REQ_SORTED, /* elevator knows about this request */ 207 __REQ_SORTED, /* elevator knows about this request */
208 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 208 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
209 __REQ_HARDBARRIER, /* may not be passed by drive either */ 209 __REQ_HARDBARRIER, /* may not be passed by drive either */
210 __REQ_FUA, /* forced unit access */
210 __REQ_CMD, /* is a regular fs rw request */ 211 __REQ_CMD, /* is a regular fs rw request */
211 __REQ_NOMERGE, /* don't touch this for merging */ 212 __REQ_NOMERGE, /* don't touch this for merging */
212 __REQ_STARTED, /* drive already may have started this one */ 213 __REQ_STARTED, /* drive already may have started this one */
@@ -230,9 +231,7 @@ enum rq_flag_bits {
230 __REQ_PM_SUSPEND, /* suspend request */ 231 __REQ_PM_SUSPEND, /* suspend request */
231 __REQ_PM_RESUME, /* resume request */ 232 __REQ_PM_RESUME, /* resume request */
232 __REQ_PM_SHUTDOWN, /* shutdown request */ 233 __REQ_PM_SHUTDOWN, /* shutdown request */
233 __REQ_BAR_PREFLUSH, /* barrier pre-flush done */ 234 __REQ_ORDERED_COLOR, /* is before or after barrier */
234 __REQ_BAR_POSTFLUSH, /* barrier post-flush */
235 __REQ_BAR_FLUSH, /* rq is the flush request */
236 __REQ_NR_BITS, /* stops here */ 235 __REQ_NR_BITS, /* stops here */
237}; 236};
238 237
@@ -241,6 +240,7 @@ enum rq_flag_bits {
241#define REQ_SORTED (1 << __REQ_SORTED) 240#define REQ_SORTED (1 << __REQ_SORTED)
242#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 241#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
243#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 242#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
243#define REQ_FUA (1 << __REQ_FUA)
244#define REQ_CMD (1 << __REQ_CMD) 244#define REQ_CMD (1 << __REQ_CMD)
245#define REQ_NOMERGE (1 << __REQ_NOMERGE) 245#define REQ_NOMERGE (1 << __REQ_NOMERGE)
246#define REQ_STARTED (1 << __REQ_STARTED) 246#define REQ_STARTED (1 << __REQ_STARTED)
@@ -260,9 +260,7 @@ enum rq_flag_bits {
260#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) 260#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
261#define REQ_PM_RESUME (1 << __REQ_PM_RESUME) 261#define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
262#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) 262#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
263#define REQ_BAR_PREFLUSH (1 << __REQ_BAR_PREFLUSH) 263#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
264#define REQ_BAR_POSTFLUSH (1 << __REQ_BAR_POSTFLUSH)
265#define REQ_BAR_FLUSH (1 << __REQ_BAR_FLUSH)
266 264
267/* 265/*
268 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME 266 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
@@ -292,8 +290,7 @@ struct bio_vec;
292typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); 290typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
293typedef void (activity_fn) (void *data, int rw); 291typedef void (activity_fn) (void *data, int rw);
294typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); 292typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
295typedef int (prepare_flush_fn) (request_queue_t *, struct request *); 293typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
296typedef void (end_flush_fn) (request_queue_t *, struct request *);
297 294
298enum blk_queue_state { 295enum blk_queue_state {
299 Queue_down, 296 Queue_down,
@@ -335,7 +332,6 @@ struct request_queue
335 activity_fn *activity_fn; 332 activity_fn *activity_fn;
336 issue_flush_fn *issue_flush_fn; 333 issue_flush_fn *issue_flush_fn;
337 prepare_flush_fn *prepare_flush_fn; 334 prepare_flush_fn *prepare_flush_fn;
338 end_flush_fn *end_flush_fn;
339 335
340 /* 336 /*
341 * Dispatch queue sorting 337 * Dispatch queue sorting
@@ -420,14 +416,11 @@ struct request_queue
420 /* 416 /*
421 * reserved for flush operations 417 * reserved for flush operations
422 */ 418 */
423 struct request *flush_rq; 419 unsigned int ordered, next_ordered, ordseq;
424 unsigned char ordered; 420 int orderr, ordcolor;
425}; 421 struct request pre_flush_rq, bar_rq, post_flush_rq;
426 422 struct request *orig_bar_rq;
427enum { 423 unsigned int bi_size;
428 QUEUE_ORDERED_NONE,
429 QUEUE_ORDERED_TAG,
430 QUEUE_ORDERED_FLUSH,
431}; 424};
432 425
433#define RQ_INACTIVE (-1) 426#define RQ_INACTIVE (-1)
@@ -445,12 +438,51 @@ enum {
445#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 438#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
446#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 439#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
447#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 440#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
448#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ 441
442enum {
443 /*
444 * Hardbarrier is supported with one of the following methods.
445 *
446 * NONE : hardbarrier unsupported
447 * DRAIN : ordering by draining is enough
448 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
449 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
450 * TAG : ordering by tag is enough
451 * TAG_FLUSH : ordering by tag w/ pre and post flushes
452 * TAG_FUA : ordering by tag w/ pre flush and FUA write
453 */
454 QUEUE_ORDERED_NONE = 0x00,
455 QUEUE_ORDERED_DRAIN = 0x01,
456 QUEUE_ORDERED_TAG = 0x02,
457
458 QUEUE_ORDERED_PREFLUSH = 0x10,
459 QUEUE_ORDERED_POSTFLUSH = 0x20,
460 QUEUE_ORDERED_FUA = 0x40,
461
462 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
463 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
464 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
465 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
466 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
467 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
468 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
469 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
470
471 /*
472 * Ordered operation sequence
473 */
474 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
475 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
476 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
477 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
478 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
479 QUEUE_ORDSEQ_DONE = 0x20,
480};
449 481
450#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 482#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
451#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 483#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
452#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 484#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
453#define blk_queue_flushing(q) test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags) 485#define blk_queue_flushing(q) ((q)->ordseq)
454 486
455#define blk_fs_request(rq) ((rq)->flags & REQ_CMD) 487#define blk_fs_request(rq) ((rq)->flags & REQ_CMD)
456#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) 488#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC)
@@ -466,8 +498,7 @@ enum {
466 498
467#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) 499#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED)
468#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) 500#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
469#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH) 501#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA)
470#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH)
471 502
472#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 503#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
473 504
@@ -665,11 +696,12 @@ extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
665extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); 696extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
666extern void blk_queue_dma_alignment(request_queue_t *, int); 697extern void blk_queue_dma_alignment(request_queue_t *, int);
667extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 698extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
668extern void blk_queue_ordered(request_queue_t *, int); 699extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
669extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); 700extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
670extern struct request *blk_start_pre_flush(request_queue_t *,struct request *); 701extern int blk_do_ordered(request_queue_t *, struct request **);
671extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int); 702extern unsigned blk_ordered_cur_seq(request_queue_t *);
672extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int); 703extern unsigned blk_ordered_req_seq(struct request *);
704extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int);
673 705
674extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); 706extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
675extern void blk_dump_rq_flags(struct request *, char *); 707extern void blk_dump_rq_flags(struct request *, char *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index a74c27e460ba..fb80fa44c4dd 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -130,6 +130,7 @@ extern int elv_try_last_merge(request_queue_t *, struct bio *);
130#define ELEVATOR_INSERT_FRONT 1 130#define ELEVATOR_INSERT_FRONT 1
131#define ELEVATOR_INSERT_BACK 2 131#define ELEVATOR_INSERT_BACK 2
132#define ELEVATOR_INSERT_SORT 3 132#define ELEVATOR_INSERT_SORT 3
133#define ELEVATOR_INSERT_REQUEUE 4
133 134
134/* 135/*
135 * return values from elevator_may_queue_fn 136 * return values from elevator_may_queue_fn