aboutsummaryrefslogtreecommitdiffstats
path: root/block/ll_rw_blk.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/ll_rw_blk.c')
-rw-r--r--block/ll_rw_blk.c384
1 files changed, 243 insertions, 141 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 65c4efc02adf..91d3b4828c49 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -290,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
290 290
291/** 291/**
292 * blk_queue_ordered - does this queue support ordered writes 292 * blk_queue_ordered - does this queue support ordered writes
293 * @q: the request queue 293 * @q: the request queue
294 * @flag: see below 294 * @ordered: one of QUEUE_ORDERED_*
295 * 295 *
296 * Description: 296 * Description:
297 * For journalled file systems, doing ordered writes on a commit 297 * For journalled file systems, doing ordered writes on a commit
@@ -300,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
300 * feature should call this function and indicate so. 300 * feature should call this function and indicate so.
301 * 301 *
302 **/ 302 **/
303void blk_queue_ordered(request_queue_t *q, int flag) 303int blk_queue_ordered(request_queue_t *q, unsigned ordered,
304{ 304 prepare_flush_fn *prepare_flush_fn)
305 switch (flag) { 305{
306 case QUEUE_ORDERED_NONE: 306 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
307 if (q->flush_rq) 307 prepare_flush_fn == NULL) {
308 kmem_cache_free(request_cachep, q->flush_rq); 308 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
309 q->flush_rq = NULL; 309 return -EINVAL;
310 q->ordered = flag; 310 }
311 break; 311
312 case QUEUE_ORDERED_TAG: 312 if (ordered != QUEUE_ORDERED_NONE &&
313 q->ordered = flag; 313 ordered != QUEUE_ORDERED_DRAIN &&
314 break; 314 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
315 case QUEUE_ORDERED_FLUSH: 315 ordered != QUEUE_ORDERED_DRAIN_FUA &&
316 q->ordered = flag; 316 ordered != QUEUE_ORDERED_TAG &&
317 if (!q->flush_rq) 317 ordered != QUEUE_ORDERED_TAG_FLUSH &&
318 q->flush_rq = kmem_cache_alloc(request_cachep, 318 ordered != QUEUE_ORDERED_TAG_FUA) {
319 GFP_KERNEL); 319 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
320 break; 320 return -EINVAL;
321 default:
322 printk("blk_queue_ordered: bad value %d\n", flag);
323 break;
324 } 321 }
322
323 q->next_ordered = ordered;
324 q->prepare_flush_fn = prepare_flush_fn;
325
326 return 0;
325} 327}
326 328
327EXPORT_SYMBOL(blk_queue_ordered); 329EXPORT_SYMBOL(blk_queue_ordered);
@@ -346,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
346/* 348/*
347 * Cache flushing for ordered writes handling 349 * Cache flushing for ordered writes handling
348 */ 350 */
349static void blk_pre_flush_end_io(struct request *flush_rq, int error) 351inline unsigned blk_ordered_cur_seq(request_queue_t *q)
350{ 352{
351 struct request *rq = flush_rq->end_io_data; 353 if (!q->ordseq)
352 request_queue_t *q = rq->q; 354 return 0;
353 355 return 1 << ffz(q->ordseq);
354 elv_completed_request(q, flush_rq);
355
356 rq->flags |= REQ_BAR_PREFLUSH;
357
358 if (!flush_rq->errors)
359 elv_requeue_request(q, rq);
360 else {
361 q->end_flush_fn(q, flush_rq);
362 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
363 q->request_fn(q);
364 }
365} 356}
366 357
367static void blk_post_flush_end_io(struct request *flush_rq, int error) 358unsigned blk_ordered_req_seq(struct request *rq)
368{ 359{
369 struct request *rq = flush_rq->end_io_data;
370 request_queue_t *q = rq->q; 360 request_queue_t *q = rq->q;
371 361
372 elv_completed_request(q, flush_rq); 362 BUG_ON(q->ordseq == 0);
373 363
374 rq->flags |= REQ_BAR_POSTFLUSH; 364 if (rq == &q->pre_flush_rq)
365 return QUEUE_ORDSEQ_PREFLUSH;
366 if (rq == &q->bar_rq)
367 return QUEUE_ORDSEQ_BAR;
368 if (rq == &q->post_flush_rq)
369 return QUEUE_ORDSEQ_POSTFLUSH;
375 370
376 q->end_flush_fn(q, flush_rq); 371 if ((rq->flags & REQ_ORDERED_COLOR) ==
377 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 372 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
378 q->request_fn(q); 373 return QUEUE_ORDSEQ_DRAIN;
374 else
375 return QUEUE_ORDSEQ_DONE;
379} 376}
380 377
381struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 378void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
382{ 379{
383 struct request *flush_rq = q->flush_rq; 380 struct request *rq;
384 381 int uptodate;
385 BUG_ON(!blk_barrier_rq(rq));
386 382
387 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 383 if (error && !q->orderr)
388 return NULL; 384 q->orderr = error;
389 385
390 rq_init(q, flush_rq); 386 BUG_ON(q->ordseq & seq);
391 flush_rq->elevator_private = NULL; 387 q->ordseq |= seq;
392 flush_rq->flags = REQ_BAR_FLUSH;
393 flush_rq->rq_disk = rq->rq_disk;
394 flush_rq->rl = NULL;
395 388
396 /* 389 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
397 * prepare_flush returns 0 if no flush is needed, just mark both 390 return;
398 * pre and post flush as done in that case
399 */
400 if (!q->prepare_flush_fn(q, flush_rq)) {
401 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
402 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
403 return rq;
404 }
405 391
406 /* 392 /*
407 * some drivers dequeue requests right away, some only after io 393 * Okay, sequence complete.
408 * completion. make sure the request is dequeued.
409 */ 394 */
410 if (!list_empty(&rq->queuelist)) 395 rq = q->orig_bar_rq;
411 blkdev_dequeue_request(rq); 396 uptodate = q->orderr ? q->orderr : 1;
412 397
413 flush_rq->end_io_data = rq; 398 q->ordseq = 0;
414 flush_rq->end_io = blk_pre_flush_end_io;
415 399
416 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 400 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
417 return flush_rq; 401 end_that_request_last(rq, uptodate);
418} 402}
419 403
420static void blk_start_post_flush(request_queue_t *q, struct request *rq) 404static void pre_flush_end_io(struct request *rq, int error)
421{ 405{
422 struct request *flush_rq = q->flush_rq; 406 elv_completed_request(rq->q, rq);
407 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
408}
423 409
424 BUG_ON(!blk_barrier_rq(rq)); 410static void bar_end_io(struct request *rq, int error)
411{
412 elv_completed_request(rq->q, rq);
413 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
414}
425 415
426 rq_init(q, flush_rq); 416static void post_flush_end_io(struct request *rq, int error)
427 flush_rq->elevator_private = NULL; 417{
428 flush_rq->flags = REQ_BAR_FLUSH; 418 elv_completed_request(rq->q, rq);
429 flush_rq->rq_disk = rq->rq_disk; 419 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
430 flush_rq->rl = NULL; 420}
431 421
432 if (q->prepare_flush_fn(q, flush_rq)) { 422static void queue_flush(request_queue_t *q, unsigned which)
433 flush_rq->end_io_data = rq; 423{
434 flush_rq->end_io = blk_post_flush_end_io; 424 struct request *rq;
425 rq_end_io_fn *end_io;
435 426
436 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 427 if (which == QUEUE_ORDERED_PREFLUSH) {
437 q->request_fn(q); 428 rq = &q->pre_flush_rq;
429 end_io = pre_flush_end_io;
430 } else {
431 rq = &q->post_flush_rq;
432 end_io = post_flush_end_io;
438 } 433 }
434
435 rq_init(q, rq);
436 rq->flags = REQ_HARDBARRIER;
437 rq->elevator_private = NULL;
438 rq->rq_disk = q->bar_rq.rq_disk;
439 rq->rl = NULL;
440 rq->end_io = end_io;
441 q->prepare_flush_fn(q, rq);
442
443 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
439} 444}
440 445
441static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 446static inline struct request *start_ordered(request_queue_t *q,
442 int sectors) 447 struct request *rq)
443{ 448{
444 if (sectors > rq->nr_sectors) 449 q->bi_size = 0;
445 sectors = rq->nr_sectors; 450 q->orderr = 0;
451 q->ordered = q->next_ordered;
452 q->ordseq |= QUEUE_ORDSEQ_STARTED;
453
454 /*
455 * Prep proxy barrier request.
456 */
457 blkdev_dequeue_request(rq);
458 q->orig_bar_rq = rq;
459 rq = &q->bar_rq;
460 rq_init(q, rq);
461 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
462 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
463 rq->elevator_private = NULL;
464 rq->rl = NULL;
465 init_request_from_bio(rq, q->orig_bar_rq->bio);
466 rq->end_io = bar_end_io;
467
468 /*
469 * Queue ordered sequence. As we stack them at the head, we
470 * need to queue in reverse order. Note that we rely on that
471 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
472 * request gets inbetween ordered sequence.
473 */
474 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
475 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
476 else
477 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
478
479 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
480
481 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
482 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
483 rq = &q->pre_flush_rq;
484 } else
485 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
446 486
447 rq->nr_sectors -= sectors; 487 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
448 return rq->nr_sectors; 488 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
489 else
490 rq = NULL;
491
492 return rq;
449} 493}
450 494
451static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 495int blk_do_ordered(request_queue_t *q, struct request **rqp)
452 int sectors, int queue_locked)
453{ 496{
454 if (q->ordered != QUEUE_ORDERED_FLUSH) 497 struct request *rq = *rqp, *allowed_rq;
455 return 0; 498 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
456 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
457 return 0;
458 if (blk_barrier_postflush(rq))
459 return 0;
460 499
461 if (!blk_check_end_barrier(q, rq, sectors)) { 500 if (!q->ordseq) {
462 unsigned long flags = 0; 501 if (!is_barrier)
502 return 1;
463 503
464 if (!queue_locked) 504 if (q->next_ordered != QUEUE_ORDERED_NONE) {
465 spin_lock_irqsave(q->queue_lock, flags); 505 *rqp = start_ordered(q, rq);
506 return 1;
507 } else {
508 /*
509 * This can happen when the queue switches to
510 * ORDERED_NONE while this request is on it.
511 */
512 blkdev_dequeue_request(rq);
513 end_that_request_first(rq, -EOPNOTSUPP,
514 rq->hard_nr_sectors);
515 end_that_request_last(rq, -EOPNOTSUPP);
516 *rqp = NULL;
517 return 0;
518 }
519 }
466 520
467 blk_start_post_flush(q, rq); 521 if (q->ordered & QUEUE_ORDERED_TAG) {
522 if (is_barrier && rq != &q->bar_rq)
523 *rqp = NULL;
524 return 1;
525 }
468 526
469 if (!queue_locked) 527 switch (blk_ordered_cur_seq(q)) {
470 spin_unlock_irqrestore(q->queue_lock, flags); 528 case QUEUE_ORDSEQ_PREFLUSH:
529 allowed_rq = &q->pre_flush_rq;
530 break;
531 case QUEUE_ORDSEQ_BAR:
532 allowed_rq = &q->bar_rq;
533 break;
534 case QUEUE_ORDSEQ_POSTFLUSH:
535 allowed_rq = &q->post_flush_rq;
536 break;
537 default:
538 allowed_rq = NULL;
539 break;
471 } 540 }
472 541
542 if (rq != allowed_rq &&
543 (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
544 rq == &q->post_flush_rq))
545 *rqp = NULL;
546
473 return 1; 547 return 1;
474} 548}
475 549
476/** 550static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
477 * blk_complete_barrier_rq - complete possible barrier request
478 * @q: the request queue for the device
479 * @rq: the request
480 * @sectors: number of sectors to complete
481 *
482 * Description:
483 * Used in driver end_io handling to determine whether to postpone
484 * completion of a barrier request until a post flush has been done. This
485 * is the unlocked variant, used if the caller doesn't already hold the
486 * queue lock.
487 **/
488int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
489{ 551{
490 return __blk_complete_barrier_rq(q, rq, sectors, 0); 552 request_queue_t *q = bio->bi_private;
553 struct bio_vec *bvec;
554 int i;
555
556 /*
557 * This is dry run, restore bio_sector and size. We'll finish
558 * this request again with the original bi_end_io after an
559 * error occurs or post flush is complete.
560 */
561 q->bi_size += bytes;
562
563 if (bio->bi_size)
564 return 1;
565
566 /* Rewind bvec's */
567 bio->bi_idx = 0;
568 bio_for_each_segment(bvec, bio, i) {
569 bvec->bv_len += bvec->bv_offset;
570 bvec->bv_offset = 0;
571 }
572
573 /* Reset bio */
574 set_bit(BIO_UPTODATE, &bio->bi_flags);
575 bio->bi_size = q->bi_size;
576 bio->bi_sector -= (q->bi_size >> 9);
577 q->bi_size = 0;
578
579 return 0;
491} 580}
492EXPORT_SYMBOL(blk_complete_barrier_rq);
493 581
494/** 582static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
495 * blk_complete_barrier_rq_locked - complete possible barrier request 583 unsigned int nbytes, int error)
496 * @q: the request queue for the device
497 * @rq: the request
498 * @sectors: number of sectors to complete
499 *
500 * Description:
501 * See blk_complete_barrier_rq(). This variant must be used if the caller
502 * holds the queue lock.
503 **/
504int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
505 int sectors)
506{ 584{
507 return __blk_complete_barrier_rq(q, rq, sectors, 1); 585 request_queue_t *q = rq->q;
586 bio_end_io_t *endio;
587 void *private;
588
589 if (&q->bar_rq != rq)
590 return 0;
591
592 /*
593 * Okay, this is the barrier request in progress, dry finish it.
594 */
595 if (error && !q->orderr)
596 q->orderr = error;
597
598 endio = bio->bi_end_io;
599 private = bio->bi_private;
600 bio->bi_end_io = flush_dry_bio_endio;
601 bio->bi_private = q;
602
603 bio_endio(bio, nbytes, error);
604
605 bio->bi_end_io = endio;
606 bio->bi_private = private;
607
608 return 1;
508} 609}
509EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
510 610
511/** 611/**
512 * blk_queue_bounce_limit - set bounce buffer limit for queue 612 * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -1047,6 +1147,7 @@ static const char * const rq_flags[] = {
1047 "REQ_SORTED", 1147 "REQ_SORTED",
1048 "REQ_SOFTBARRIER", 1148 "REQ_SOFTBARRIER",
1049 "REQ_HARDBARRIER", 1149 "REQ_HARDBARRIER",
1150 "REQ_FUA",
1050 "REQ_CMD", 1151 "REQ_CMD",
1051 "REQ_NOMERGE", 1152 "REQ_NOMERGE",
1052 "REQ_STARTED", 1153 "REQ_STARTED",
@@ -1066,6 +1167,7 @@ static const char * const rq_flags[] = {
1066 "REQ_PM_SUSPEND", 1167 "REQ_PM_SUSPEND",
1067 "REQ_PM_RESUME", 1168 "REQ_PM_RESUME",
1068 "REQ_PM_SHUTDOWN", 1169 "REQ_PM_SHUTDOWN",
1170 "REQ_ORDERED_COLOR",
1069}; 1171};
1070 1172
1071void blk_dump_rq_flags(struct request *rq, char *msg) 1173void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1643,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q)
1643 if (q->queue_tags) 1745 if (q->queue_tags)
1644 __blk_queue_free_tags(q); 1746 __blk_queue_free_tags(q);
1645 1747
1646 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1647
1648 kmem_cache_free(requestq_cachep, q); 1748 kmem_cache_free(requestq_cachep, q);
1649} 1749}
1650 1750
@@ -2714,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2714 spin_lock_prefetch(q->queue_lock); 2814 spin_lock_prefetch(q->queue_lock);
2715 2815
2716 barrier = bio_barrier(bio); 2816 barrier = bio_barrier(bio);
2717 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2817 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2718 err = -EOPNOTSUPP; 2818 err = -EOPNOTSUPP;
2719 goto end_io; 2819 goto end_io;
2720 } 2820 }
@@ -3075,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3075 if (nr_bytes >= bio->bi_size) { 3175 if (nr_bytes >= bio->bi_size) {
3076 req->bio = bio->bi_next; 3176 req->bio = bio->bi_next;
3077 nbytes = bio->bi_size; 3177 nbytes = bio->bi_size;
3078 bio_endio(bio, nbytes, error); 3178 if (!ordered_bio_endio(req, bio, nbytes, error))
3179 bio_endio(bio, nbytes, error);
3079 next_idx = 0; 3180 next_idx = 0;
3080 bio_nbytes = 0; 3181 bio_nbytes = 0;
3081 } else { 3182 } else {
@@ -3130,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3130 * if the request wasn't completed, update state 3231 * if the request wasn't completed, update state
3131 */ 3232 */
3132 if (bio_nbytes) { 3233 if (bio_nbytes) {
3133 bio_endio(bio, bio_nbytes, error); 3234 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3235 bio_endio(bio, bio_nbytes, error);
3134 bio->bi_idx += next_idx; 3236 bio->bi_idx += next_idx;
3135 bio_iovec(bio)->bv_offset += nr_bytes; 3237 bio_iovec(bio)->bv_offset += nr_bytes;
3136 bio_iovec(bio)->bv_len -= nr_bytes; 3238 bio_iovec(bio)->bv_len -= nr_bytes;