aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/ll_rw_blk.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/ll_rw_blk.c')
-rw-r--r--drivers/block/ll_rw_blk.c206
1 files changed, 105 insertions, 101 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 60e64091de1b..692a5fced76e 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -276,6 +276,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
276 rq->errors = 0; 276 rq->errors = 0;
277 rq->rq_status = RQ_ACTIVE; 277 rq->rq_status = RQ_ACTIVE;
278 rq->bio = rq->biotail = NULL; 278 rq->bio = rq->biotail = NULL;
279 rq->ioprio = 0;
279 rq->buffer = NULL; 280 rq->buffer = NULL;
280 rq->ref_count = 1; 281 rq->ref_count = 1;
281 rq->q = q; 282 rq->q = q;
@@ -1442,11 +1443,7 @@ void __generic_unplug_device(request_queue_t *q)
1442 if (!blk_remove_plug(q)) 1443 if (!blk_remove_plug(q))
1443 return; 1444 return;
1444 1445
1445 /* 1446 q->request_fn(q);
1446 * was plugged, fire request_fn if queue has stuff to do
1447 */
1448 if (elv_next_request(q))
1449 q->request_fn(q);
1450} 1447}
1451EXPORT_SYMBOL(__generic_unplug_device); 1448EXPORT_SYMBOL(__generic_unplug_device);
1452 1449
@@ -1776,8 +1773,8 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
1776 mempool_free(rq, q->rq.rq_pool); 1773 mempool_free(rq, q->rq.rq_pool);
1777} 1774}
1778 1775
1779static inline struct request *blk_alloc_request(request_queue_t *q, int rw, 1776static inline struct request *
1780 int gfp_mask) 1777blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
1781{ 1778{
1782 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1779 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1783 1780
@@ -1790,7 +1787,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
1790 */ 1787 */
1791 rq->flags = rw; 1788 rq->flags = rw;
1792 1789
1793 if (!elv_set_request(q, rq, gfp_mask)) 1790 if (!elv_set_request(q, rq, bio, gfp_mask))
1794 return rq; 1791 return rq;
1795 1792
1796 mempool_free(rq, q->rq.rq_pool); 1793 mempool_free(rq, q->rq.rq_pool);
@@ -1870,18 +1867,20 @@ static void freed_request(request_queue_t *q, int rw)
1870 1867
1871#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 1868#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
1872/* 1869/*
1873 * Get a free request, queue_lock must not be held 1870 * Get a free request, queue_lock must be held.
1871 * Returns NULL on failure, with queue_lock held.
1872 * Returns !NULL on success, with queue_lock *not held*.
1874 */ 1873 */
1875static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) 1874static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1875 int gfp_mask)
1876{ 1876{
1877 struct request *rq = NULL; 1877 struct request *rq = NULL;
1878 struct request_list *rl = &q->rq; 1878 struct request_list *rl = &q->rq;
1879 struct io_context *ioc = get_io_context(gfp_mask); 1879 struct io_context *ioc = current_io_context(GFP_ATOMIC);
1880 1880
1881 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) 1881 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
1882 goto out; 1882 goto out;
1883 1883
1884 spin_lock_irq(q->queue_lock);
1885 if (rl->count[rw]+1 >= q->nr_requests) { 1884 if (rl->count[rw]+1 >= q->nr_requests) {
1886 /* 1885 /*
1887 * The queue will fill after this allocation, so set it as 1886 * The queue will fill after this allocation, so set it as
@@ -1895,7 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
1895 } 1894 }
1896 } 1895 }
1897 1896
1898 switch (elv_may_queue(q, rw)) { 1897 switch (elv_may_queue(q, rw, bio)) {
1899 case ELV_MQUEUE_NO: 1898 case ELV_MQUEUE_NO:
1900 goto rq_starved; 1899 goto rq_starved;
1901 case ELV_MQUEUE_MAY: 1900 case ELV_MQUEUE_MAY:
@@ -1909,18 +1908,25 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
1909 * The queue is full and the allocating process is not a 1908 * The queue is full and the allocating process is not a
1910 * "batcher", and not exempted by the IO scheduler 1909 * "batcher", and not exempted by the IO scheduler
1911 */ 1910 */
1912 spin_unlock_irq(q->queue_lock);
1913 goto out; 1911 goto out;
1914 } 1912 }
1915 1913
1916get_rq: 1914get_rq:
1915 /*
1916 * Only allow batching queuers to allocate up to 50% over the defined
1917 * limit of requests, otherwise we could have thousands of requests
1918 * allocated with any setting of ->nr_requests
1919 */
1920 if (rl->count[rw] >= (3 * q->nr_requests / 2))
1921 goto out;
1922
1917 rl->count[rw]++; 1923 rl->count[rw]++;
1918 rl->starved[rw] = 0; 1924 rl->starved[rw] = 0;
1919 if (rl->count[rw] >= queue_congestion_on_threshold(q)) 1925 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1920 set_queue_congested(q, rw); 1926 set_queue_congested(q, rw);
1921 spin_unlock_irq(q->queue_lock); 1927 spin_unlock_irq(q->queue_lock);
1922 1928
1923 rq = blk_alloc_request(q, rw, gfp_mask); 1929 rq = blk_alloc_request(q, rw, bio, gfp_mask);
1924 if (!rq) { 1930 if (!rq) {
1925 /* 1931 /*
1926 * Allocation failed presumably due to memory. Undo anything 1932 * Allocation failed presumably due to memory. Undo anything
@@ -1943,7 +1949,6 @@ rq_starved:
1943 if (unlikely(rl->count[rw] == 0)) 1949 if (unlikely(rl->count[rw] == 0))
1944 rl->starved[rw] = 1; 1950 rl->starved[rw] = 1;
1945 1951
1946 spin_unlock_irq(q->queue_lock);
1947 goto out; 1952 goto out;
1948 } 1953 }
1949 1954
@@ -1953,31 +1958,35 @@ rq_starved:
1953 rq_init(q, rq); 1958 rq_init(q, rq);
1954 rq->rl = rl; 1959 rq->rl = rl;
1955out: 1960out:
1956 put_io_context(ioc);
1957 return rq; 1961 return rq;
1958} 1962}
1959 1963
1960/* 1964/*
1961 * No available requests for this queue, unplug the device and wait for some 1965 * No available requests for this queue, unplug the device and wait for some
1962 * requests to become available. 1966 * requests to become available.
1967 *
1968 * Called with q->queue_lock held, and returns with it unlocked.
1963 */ 1969 */
1964static struct request *get_request_wait(request_queue_t *q, int rw) 1970static struct request *get_request_wait(request_queue_t *q, int rw,
1971 struct bio *bio)
1965{ 1972{
1966 DEFINE_WAIT(wait);
1967 struct request *rq; 1973 struct request *rq;
1968 1974
1969 do { 1975 rq = get_request(q, rw, bio, GFP_NOIO);
1976 while (!rq) {
1977 DEFINE_WAIT(wait);
1970 struct request_list *rl = &q->rq; 1978 struct request_list *rl = &q->rq;
1971 1979
1972 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 1980 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
1973 TASK_UNINTERRUPTIBLE); 1981 TASK_UNINTERRUPTIBLE);
1974 1982
1975 rq = get_request(q, rw, GFP_NOIO); 1983 rq = get_request(q, rw, bio, GFP_NOIO);
1976 1984
1977 if (!rq) { 1985 if (!rq) {
1978 struct io_context *ioc; 1986 struct io_context *ioc;
1979 1987
1980 generic_unplug_device(q); 1988 __generic_unplug_device(q);
1989 spin_unlock_irq(q->queue_lock);
1981 io_schedule(); 1990 io_schedule();
1982 1991
1983 /* 1992 /*
@@ -1986,12 +1995,13 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
1986 * up to a big batch of them for a small period time. 1995 * up to a big batch of them for a small period time.
1987 * See ioc_batching, ioc_set_batching 1996 * See ioc_batching, ioc_set_batching
1988 */ 1997 */
1989 ioc = get_io_context(GFP_NOIO); 1998 ioc = current_io_context(GFP_NOIO);
1990 ioc_set_batching(q, ioc); 1999 ioc_set_batching(q, ioc);
1991 put_io_context(ioc); 2000
2001 spin_lock_irq(q->queue_lock);
1992 } 2002 }
1993 finish_wait(&rl->wait[rw], &wait); 2003 finish_wait(&rl->wait[rw], &wait);
1994 } while (!rq); 2004 }
1995 2005
1996 return rq; 2006 return rq;
1997} 2007}
@@ -2002,14 +2012,18 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
2002 2012
2003 BUG_ON(rw != READ && rw != WRITE); 2013 BUG_ON(rw != READ && rw != WRITE);
2004 2014
2005 if (gfp_mask & __GFP_WAIT) 2015 spin_lock_irq(q->queue_lock);
2006 rq = get_request_wait(q, rw); 2016 if (gfp_mask & __GFP_WAIT) {
2007 else 2017 rq = get_request_wait(q, rw, NULL);
2008 rq = get_request(q, rw, gfp_mask); 2018 } else {
2019 rq = get_request(q, rw, NULL, gfp_mask);
2020 if (!rq)
2021 spin_unlock_irq(q->queue_lock);
2022 }
2023 /* q->queue_lock is unlocked at this point */
2009 2024
2010 return rq; 2025 return rq;
2011} 2026}
2012
2013EXPORT_SYMBOL(blk_get_request); 2027EXPORT_SYMBOL(blk_get_request);
2014 2028
2015/** 2029/**
@@ -2333,7 +2347,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2333 return; 2347 return;
2334 2348
2335 req->rq_status = RQ_INACTIVE; 2349 req->rq_status = RQ_INACTIVE;
2336 req->q = NULL;
2337 req->rl = NULL; 2350 req->rl = NULL;
2338 2351
2339 /* 2352 /*
@@ -2462,6 +2475,8 @@ static int attempt_merge(request_queue_t *q, struct request *req,
2462 req->rq_disk->in_flight--; 2475 req->rq_disk->in_flight--;
2463 } 2476 }
2464 2477
2478 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2479
2465 __blk_put_request(q, next); 2480 __blk_put_request(q, next);
2466 return 1; 2481 return 1;
2467} 2482}
@@ -2512,13 +2527,15 @@ EXPORT_SYMBOL(blk_attempt_remerge);
2512 2527
2513static int __make_request(request_queue_t *q, struct bio *bio) 2528static int __make_request(request_queue_t *q, struct bio *bio)
2514{ 2529{
2515 struct request *req, *freereq = NULL; 2530 struct request *req;
2516 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; 2531 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2532 unsigned short prio;
2517 sector_t sector; 2533 sector_t sector;
2518 2534
2519 sector = bio->bi_sector; 2535 sector = bio->bi_sector;
2520 nr_sectors = bio_sectors(bio); 2536 nr_sectors = bio_sectors(bio);
2521 cur_nr_sectors = bio_cur_sectors(bio); 2537 cur_nr_sectors = bio_cur_sectors(bio);
2538 prio = bio_prio(bio);
2522 2539
2523 rw = bio_data_dir(bio); 2540 rw = bio_data_dir(bio);
2524 sync = bio_sync(bio); 2541 sync = bio_sync(bio);
@@ -2538,14 +2555,9 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2538 goto end_io; 2555 goto end_io;
2539 } 2556 }
2540 2557
2541again:
2542 spin_lock_irq(q->queue_lock); 2558 spin_lock_irq(q->queue_lock);
2543 2559
2544 if (elv_queue_empty(q)) { 2560 if (unlikely(barrier) || elv_queue_empty(q))
2545 blk_plug_device(q);
2546 goto get_rq;
2547 }
2548 if (barrier)
2549 goto get_rq; 2561 goto get_rq;
2550 2562
2551 el_ret = elv_merge(q, &req, bio); 2563 el_ret = elv_merge(q, &req, bio);
@@ -2559,6 +2571,7 @@ again:
2559 req->biotail->bi_next = bio; 2571 req->biotail->bi_next = bio;
2560 req->biotail = bio; 2572 req->biotail = bio;
2561 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2573 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2574 req->ioprio = ioprio_best(req->ioprio, prio);
2562 drive_stat_acct(req, nr_sectors, 0); 2575 drive_stat_acct(req, nr_sectors, 0);
2563 if (!attempt_back_merge(q, req)) 2576 if (!attempt_back_merge(q, req))
2564 elv_merged_request(q, req); 2577 elv_merged_request(q, req);
@@ -2583,45 +2596,30 @@ again:
2583 req->hard_cur_sectors = cur_nr_sectors; 2596 req->hard_cur_sectors = cur_nr_sectors;
2584 req->sector = req->hard_sector = sector; 2597 req->sector = req->hard_sector = sector;
2585 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2598 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2599 req->ioprio = ioprio_best(req->ioprio, prio);
2586 drive_stat_acct(req, nr_sectors, 0); 2600 drive_stat_acct(req, nr_sectors, 0);
2587 if (!attempt_front_merge(q, req)) 2601 if (!attempt_front_merge(q, req))
2588 elv_merged_request(q, req); 2602 elv_merged_request(q, req);
2589 goto out; 2603 goto out;
2590 2604
2591 /* 2605 /* ELV_NO_MERGE: elevator says don't/can't merge. */
2592 * elevator says don't/can't merge. get new request
2593 */
2594 case ELEVATOR_NO_MERGE:
2595 break;
2596
2597 default: 2606 default:
2598 printk("elevator returned crap (%d)\n", el_ret); 2607 ;
2599 BUG();
2600 } 2608 }
2601 2609
2610get_rq:
2602 /* 2611 /*
2603 * Grab a free request from the freelist - if that is empty, check 2612 * Grab a free request. This is might sleep but can not fail.
2604 * if we are doing read ahead and abort instead of blocking for 2613 * Returns with the queue unlocked.
2605 * a free slot. 2614 */
2615 req = get_request_wait(q, rw, bio);
2616
2617 /*
2618 * After dropping the lock and possibly sleeping here, our request
2619 * may now be mergeable after it had proven unmergeable (above).
2620 * We don't worry about that case for efficiency. It won't happen
2621 * often, and the elevators are able to handle it.
2606 */ 2622 */
2607get_rq:
2608 if (freereq) {
2609 req = freereq;
2610 freereq = NULL;
2611 } else {
2612 spin_unlock_irq(q->queue_lock);
2613 if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
2614 /*
2615 * READA bit set
2616 */
2617 err = -EWOULDBLOCK;
2618 if (bio_rw_ahead(bio))
2619 goto end_io;
2620
2621 freereq = get_request_wait(q, rw);
2622 }
2623 goto again;
2624 }
2625 2623
2626 req->flags |= REQ_CMD; 2624 req->flags |= REQ_CMD;
2627 2625
@@ -2646,13 +2644,15 @@ get_rq:
2646 req->buffer = bio_data(bio); /* see ->buffer comment above */ 2644 req->buffer = bio_data(bio); /* see ->buffer comment above */
2647 req->waiting = NULL; 2645 req->waiting = NULL;
2648 req->bio = req->biotail = bio; 2646 req->bio = req->biotail = bio;
2647 req->ioprio = prio;
2649 req->rq_disk = bio->bi_bdev->bd_disk; 2648 req->rq_disk = bio->bi_bdev->bd_disk;
2650 req->start_time = jiffies; 2649 req->start_time = jiffies;
2651 2650
2651 spin_lock_irq(q->queue_lock);
2652 if (elv_queue_empty(q))
2653 blk_plug_device(q);
2652 add_request(q, req); 2654 add_request(q, req);
2653out: 2655out:
2654 if (freereq)
2655 __blk_put_request(q, freereq);
2656 if (sync) 2656 if (sync)
2657 __generic_unplug_device(q); 2657 __generic_unplug_device(q);
2658 2658
@@ -2674,7 +2674,7 @@ static inline void blk_partition_remap(struct bio *bio)
2674 if (bdev != bdev->bd_contains) { 2674 if (bdev != bdev->bd_contains) {
2675 struct hd_struct *p = bdev->bd_part; 2675 struct hd_struct *p = bdev->bd_part;
2676 2676
2677 switch (bio->bi_rw) { 2677 switch (bio_data_dir(bio)) {
2678 case READ: 2678 case READ:
2679 p->read_sectors += bio_sectors(bio); 2679 p->read_sectors += bio_sectors(bio);
2680 p->reads++; 2680 p->reads++;
@@ -2693,6 +2693,7 @@ void blk_finish_queue_drain(request_queue_t *q)
2693{ 2693{
2694 struct request_list *rl = &q->rq; 2694 struct request_list *rl = &q->rq;
2695 struct request *rq; 2695 struct request *rq;
2696 int requeued = 0;
2696 2697
2697 spin_lock_irq(q->queue_lock); 2698 spin_lock_irq(q->queue_lock);
2698 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); 2699 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2701,9 +2702,13 @@ void blk_finish_queue_drain(request_queue_t *q)
2701 rq = list_entry_rq(q->drain_list.next); 2702 rq = list_entry_rq(q->drain_list.next);
2702 2703
2703 list_del_init(&rq->queuelist); 2704 list_del_init(&rq->queuelist);
2704 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); 2705 elv_requeue_request(q, rq);
2706 requeued++;
2705 } 2707 }
2706 2708
2709 if (requeued)
2710 q->request_fn(q);
2711
2707 spin_unlock_irq(q->queue_lock); 2712 spin_unlock_irq(q->queue_lock);
2708 2713
2709 wake_up(&rl->wait[0]); 2714 wake_up(&rl->wait[0]);
@@ -2900,7 +2905,7 @@ void submit_bio(int rw, struct bio *bio)
2900 2905
2901 BIO_BUG_ON(!bio->bi_size); 2906 BIO_BUG_ON(!bio->bi_size);
2902 BIO_BUG_ON(!bio->bi_io_vec); 2907 BIO_BUG_ON(!bio->bi_io_vec);
2903 bio->bi_rw = rw; 2908 bio->bi_rw |= rw;
2904 if (rw & WRITE) 2909 if (rw & WRITE)
2905 mod_page_state(pgpgout, count); 2910 mod_page_state(pgpgout, count);
2906 else 2911 else
@@ -3257,8 +3262,11 @@ void exit_io_context(void)
3257 struct io_context *ioc; 3262 struct io_context *ioc;
3258 3263
3259 local_irq_save(flags); 3264 local_irq_save(flags);
3265 task_lock(current);
3260 ioc = current->io_context; 3266 ioc = current->io_context;
3261 current->io_context = NULL; 3267 current->io_context = NULL;
3268 ioc->task = NULL;
3269 task_unlock(current);
3262 local_irq_restore(flags); 3270 local_irq_restore(flags);
3263 3271
3264 if (ioc->aic && ioc->aic->exit) 3272 if (ioc->aic && ioc->aic->exit)
@@ -3271,53 +3279,49 @@ void exit_io_context(void)
3271 3279
3272/* 3280/*
3273 * If the current task has no IO context then create one and initialise it. 3281 * If the current task has no IO context then create one and initialise it.
3274 * If it does have a context, take a ref on it. 3282 * Otherwise, return its existing IO context.
3275 * 3283 *
3276 * This is always called in the context of the task which submitted the I/O. 3284 * This returned IO context doesn't have a specifically elevated refcount,
3277 * But weird things happen, so we disable local interrupts to ensure exclusive 3285 * but since the current task itself holds a reference, the context can be
3278 * access to *current. 3286 * used in general code, so long as it stays within `current` context.
3279 */ 3287 */
3280struct io_context *get_io_context(int gfp_flags) 3288struct io_context *current_io_context(int gfp_flags)
3281{ 3289{
3282 struct task_struct *tsk = current; 3290 struct task_struct *tsk = current;
3283 unsigned long flags;
3284 struct io_context *ret; 3291 struct io_context *ret;
3285 3292
3286 local_irq_save(flags);
3287 ret = tsk->io_context; 3293 ret = tsk->io_context;
3288 if (ret) 3294 if (likely(ret))
3289 goto out; 3295 return ret;
3290
3291 local_irq_restore(flags);
3292 3296
3293 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); 3297 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3294 if (ret) { 3298 if (ret) {
3295 atomic_set(&ret->refcount, 1); 3299 atomic_set(&ret->refcount, 1);
3296 ret->pid = tsk->pid; 3300 ret->task = current;
3301 ret->set_ioprio = NULL;
3297 ret->last_waited = jiffies; /* doesn't matter... */ 3302 ret->last_waited = jiffies; /* doesn't matter... */
3298 ret->nr_batch_requests = 0; /* because this is 0 */ 3303 ret->nr_batch_requests = 0; /* because this is 0 */
3299 ret->aic = NULL; 3304 ret->aic = NULL;
3300 ret->cic = NULL; 3305 ret->cic = NULL;
3301 spin_lock_init(&ret->lock); 3306 tsk->io_context = ret;
3302 3307 }
3303 local_irq_save(flags);
3304 3308
3305 /* 3309 return ret;
3306 * very unlikely, someone raced with us in setting up the task 3310}
3307 * io context. free new context and just grab a reference. 3311EXPORT_SYMBOL(current_io_context);
3308 */
3309 if (!tsk->io_context)
3310 tsk->io_context = ret;
3311 else {
3312 kmem_cache_free(iocontext_cachep, ret);
3313 ret = tsk->io_context;
3314 }
3315 3312
3316out: 3313/*
3314 * If the current task has no IO context then create one and initialise it.
3315 * If it does have a context, take a ref on it.
3316 *
3317 * This is always called in the context of the task which submitted the I/O.
3318 */
3319struct io_context *get_io_context(int gfp_flags)
3320{
3321 struct io_context *ret;
3322 ret = current_io_context(gfp_flags);
3323 if (likely(ret))
3317 atomic_inc(&ret->refcount); 3324 atomic_inc(&ret->refcount);
3318 local_irq_restore(flags);
3319 }
3320
3321 return ret; 3325 return ret;
3322} 3326}
3323EXPORT_SYMBOL(get_io_context); 3327EXPORT_SYMBOL(get_io_context);