diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 15:24:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 15:24:45 -0500 |
commit | b3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch) | |
tree | ad48ad4d923fee147c736318d0fad35b3755f4f5 /block | |
parent | 83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (diff) | |
parent | 5d381efb3d1f1ef10535a31ca0dd9b22fe1e1922 (diff) |
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits)
Revert "block: recursive merge requests"
block: Stop using macro stubs for the bio data integrity calls
blockdev: convert some macros to static inlines
fs: remove unneeded plug in mpage_readpages()
block: Add BLKROTATIONAL ioctl
block: Introduce blk_set_stacking_limits function
block: remove WARN_ON_ONCE() in exit_io_context()
block: an exiting task should be allowed to create io_context
block: ioc_cgroup_changed() needs to be exported
block: recursive merge requests
block, cfq: fix empty queue crash caused by request merge
block, cfq: move icq creation and rq->elv.icq association to block core
block, cfq: restructure io_cq creation path for io_context interface cleanup
block, cfq: move io_cq exit/release to blk-ioc.c
block, cfq: move icq cache management to block core
block, cfq: move io_cq lookup to blk-ioc.c
block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq
block, cfq: reorganize cfq_io_context into generic and cfq specific parts
block: remove elevator_queue->ops
block: reorder elevator switch sequence
...
Fix up conflicts in:
- block/blk-cgroup.c
Switch from can_attach_task to can_attach
- block/cfq-iosched.c
conflict with now removed cic index changes (we now use q->id instead)
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 11 | ||||
-rw-r--r-- | block/blk-core.c | 203 | ||||
-rw-r--r-- | block/blk-exec.c | 8 | ||||
-rw-r--r-- | block/blk-ioc.c | 485 | ||||
-rw-r--r-- | block/blk-settings.c | 32 | ||||
-rw-r--r-- | block/blk-sysfs.c | 12 | ||||
-rw-r--r-- | block/blk-throttle.c | 4 | ||||
-rw-r--r-- | block/blk.h | 58 | ||||
-rw-r--r-- | block/bsg.c | 4 | ||||
-rw-r--r-- | block/cfq-iosched.c | 619 | ||||
-rw-r--r-- | block/compat_ioctl.c | 3 | ||||
-rw-r--r-- | block/deadline-iosched.c | 4 | ||||
-rw-r--r-- | block/elevator.c | 217 | ||||
-rw-r--r-- | block/genhd.c | 2 | ||||
-rw-r--r-- | block/ioctl.c | 2 | ||||
-rw-r--r-- | block/noop-iosched.c | 4 |
16 files changed, 847 insertions, 821 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b8c143d68ee0..fa8f26309444 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1655,11 +1655,12 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
1655 | struct io_context *ioc; | 1655 | struct io_context *ioc; |
1656 | 1656 | ||
1657 | cgroup_taskset_for_each(task, cgrp, tset) { | 1657 | cgroup_taskset_for_each(task, cgrp, tset) { |
1658 | task_lock(task); | 1658 | /* we don't lose anything even if ioc allocation fails */ |
1659 | ioc = task->io_context; | 1659 | ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); |
1660 | if (ioc) | 1660 | if (ioc) { |
1661 | ioc->cgroup_changed = 1; | 1661 | ioc_cgroup_changed(ioc); |
1662 | task_unlock(task); | 1662 | put_io_context(ioc, NULL); |
1663 | } | ||
1663 | } | 1664 | } |
1664 | } | 1665 | } |
1665 | 1666 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 15de223c7f93..e6c05a97ee2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | |||
39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
41 | 41 | ||
42 | DEFINE_IDA(blk_queue_ida); | ||
43 | |||
42 | /* | 44 | /* |
43 | * For the allocated request tables | 45 | * For the allocated request tables |
44 | */ | 46 | */ |
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue); | |||
358 | void blk_drain_queue(struct request_queue *q, bool drain_all) | 360 | void blk_drain_queue(struct request_queue *q, bool drain_all) |
359 | { | 361 | { |
360 | while (true) { | 362 | while (true) { |
361 | int nr_rqs; | 363 | bool drain = false; |
364 | int i; | ||
362 | 365 | ||
363 | spin_lock_irq(q->queue_lock); | 366 | spin_lock_irq(q->queue_lock); |
364 | 367 | ||
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
375 | if (!list_empty(&q->queue_head)) | 378 | if (!list_empty(&q->queue_head)) |
376 | __blk_run_queue(q); | 379 | __blk_run_queue(q); |
377 | 380 | ||
378 | if (drain_all) | 381 | drain |= q->rq.elvpriv; |
379 | nr_rqs = q->rq.count[0] + q->rq.count[1]; | 382 | |
380 | else | 383 | /* |
381 | nr_rqs = q->rq.elvpriv; | 384 | * Unfortunately, requests are queued at and tracked from |
385 | * multiple places and there's no single counter which can | ||
386 | * be drained. Check all the queues and counters. | ||
387 | */ | ||
388 | if (drain_all) { | ||
389 | drain |= !list_empty(&q->queue_head); | ||
390 | for (i = 0; i < 2; i++) { | ||
391 | drain |= q->rq.count[i]; | ||
392 | drain |= q->in_flight[i]; | ||
393 | drain |= !list_empty(&q->flush_queue[i]); | ||
394 | } | ||
395 | } | ||
382 | 396 | ||
383 | spin_unlock_irq(q->queue_lock); | 397 | spin_unlock_irq(q->queue_lock); |
384 | 398 | ||
385 | if (!nr_rqs) | 399 | if (!drain) |
386 | break; | 400 | break; |
387 | msleep(10); | 401 | msleep(10); |
388 | } | 402 | } |
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
469 | if (!q) | 483 | if (!q) |
470 | return NULL; | 484 | return NULL; |
471 | 485 | ||
486 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); | ||
487 | if (q->id < 0) | ||
488 | goto fail_q; | ||
489 | |||
472 | q->backing_dev_info.ra_pages = | 490 | q->backing_dev_info.ra_pages = |
473 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 491 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
474 | q->backing_dev_info.state = 0; | 492 | q->backing_dev_info.state = 0; |
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
477 | q->node = node_id; | 495 | q->node = node_id; |
478 | 496 | ||
479 | err = bdi_init(&q->backing_dev_info); | 497 | err = bdi_init(&q->backing_dev_info); |
480 | if (err) { | 498 | if (err) |
481 | kmem_cache_free(blk_requestq_cachep, q); | 499 | goto fail_id; |
482 | return NULL; | ||
483 | } | ||
484 | 500 | ||
485 | if (blk_throtl_init(q)) { | 501 | if (blk_throtl_init(q)) |
486 | kmem_cache_free(blk_requestq_cachep, q); | 502 | goto fail_id; |
487 | return NULL; | ||
488 | } | ||
489 | 503 | ||
490 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 504 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
491 | laptop_mode_timer_fn, (unsigned long) q); | 505 | laptop_mode_timer_fn, (unsigned long) q); |
492 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 506 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
493 | INIT_LIST_HEAD(&q->timeout_list); | 507 | INIT_LIST_HEAD(&q->timeout_list); |
508 | INIT_LIST_HEAD(&q->icq_list); | ||
494 | INIT_LIST_HEAD(&q->flush_queue[0]); | 509 | INIT_LIST_HEAD(&q->flush_queue[0]); |
495 | INIT_LIST_HEAD(&q->flush_queue[1]); | 510 | INIT_LIST_HEAD(&q->flush_queue[1]); |
496 | INIT_LIST_HEAD(&q->flush_data_in_flight); | 511 | INIT_LIST_HEAD(&q->flush_data_in_flight); |
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
508 | q->queue_lock = &q->__queue_lock; | 523 | q->queue_lock = &q->__queue_lock; |
509 | 524 | ||
510 | return q; | 525 | return q; |
526 | |||
527 | fail_id: | ||
528 | ida_simple_remove(&blk_queue_ida, q->id); | ||
529 | fail_q: | ||
530 | kmem_cache_free(blk_requestq_cachep, q); | ||
531 | return NULL; | ||
511 | } | 532 | } |
512 | EXPORT_SYMBOL(blk_alloc_queue_node); | 533 | EXPORT_SYMBOL(blk_alloc_queue_node); |
513 | 534 | ||
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
605 | } | 626 | } |
606 | EXPORT_SYMBOL(blk_init_allocated_queue); | 627 | EXPORT_SYMBOL(blk_init_allocated_queue); |
607 | 628 | ||
608 | int blk_get_queue(struct request_queue *q) | 629 | bool blk_get_queue(struct request_queue *q) |
609 | { | 630 | { |
610 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 631 | if (likely(!blk_queue_dead(q))) { |
611 | kobject_get(&q->kobj); | 632 | __blk_get_queue(q); |
612 | return 0; | 633 | return true; |
613 | } | 634 | } |
614 | 635 | ||
615 | return 1; | 636 | return false; |
616 | } | 637 | } |
617 | EXPORT_SYMBOL(blk_get_queue); | 638 | EXPORT_SYMBOL(blk_get_queue); |
618 | 639 | ||
619 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 640 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
620 | { | 641 | { |
621 | if (rq->cmd_flags & REQ_ELVPRIV) | 642 | if (rq->cmd_flags & REQ_ELVPRIV) { |
622 | elv_put_request(q, rq); | 643 | elv_put_request(q, rq); |
644 | if (rq->elv.icq) | ||
645 | put_io_context(rq->elv.icq->ioc, q); | ||
646 | } | ||
647 | |||
623 | mempool_free(rq, q->rq.rq_pool); | 648 | mempool_free(rq, q->rq.rq_pool); |
624 | } | 649 | } |
625 | 650 | ||
626 | static struct request * | 651 | static struct request * |
627 | blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | 652 | blk_alloc_request(struct request_queue *q, struct io_cq *icq, |
653 | unsigned int flags, gfp_t gfp_mask) | ||
628 | { | 654 | { |
629 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
630 | 656 | ||
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | |||
635 | 661 | ||
636 | rq->cmd_flags = flags | REQ_ALLOCED; | 662 | rq->cmd_flags = flags | REQ_ALLOCED; |
637 | 663 | ||
638 | if ((flags & REQ_ELVPRIV) && | 664 | if (flags & REQ_ELVPRIV) { |
639 | unlikely(elv_set_request(q, rq, gfp_mask))) { | 665 | rq->elv.icq = icq; |
640 | mempool_free(rq, q->rq.rq_pool); | 666 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
641 | return NULL; | 667 | mempool_free(rq, q->rq.rq_pool); |
668 | return NULL; | ||
669 | } | ||
670 | /* @rq->elv.icq holds on to io_context until @rq is freed */ | ||
671 | if (icq) | ||
672 | get_io_context(icq->ioc); | ||
642 | } | 673 | } |
643 | 674 | ||
644 | return rq; | 675 | return rq; |
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
750 | { | 781 | { |
751 | struct request *rq = NULL; | 782 | struct request *rq = NULL; |
752 | struct request_list *rl = &q->rq; | 783 | struct request_list *rl = &q->rq; |
753 | struct io_context *ioc = NULL; | 784 | struct elevator_type *et; |
785 | struct io_context *ioc; | ||
786 | struct io_cq *icq = NULL; | ||
754 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 787 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
788 | bool retried = false; | ||
755 | int may_queue; | 789 | int may_queue; |
790 | retry: | ||
791 | et = q->elevator->type; | ||
792 | ioc = current->io_context; | ||
756 | 793 | ||
757 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 794 | if (unlikely(blk_queue_dead(q))) |
758 | return NULL; | 795 | return NULL; |
759 | 796 | ||
760 | may_queue = elv_may_queue(q, rw_flags); | 797 | may_queue = elv_may_queue(q, rw_flags); |
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
763 | 800 | ||
764 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 801 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
765 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 802 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
766 | ioc = current_io_context(GFP_ATOMIC, q->node); | 803 | /* |
804 | * We want ioc to record batching state. If it's | ||
805 | * not already there, creating a new one requires | ||
806 | * dropping queue_lock, which in turn requires | ||
807 | * retesting conditions to avoid queue hang. | ||
808 | */ | ||
809 | if (!ioc && !retried) { | ||
810 | spin_unlock_irq(q->queue_lock); | ||
811 | create_io_context(current, gfp_mask, q->node); | ||
812 | spin_lock_irq(q->queue_lock); | ||
813 | retried = true; | ||
814 | goto retry; | ||
815 | } | ||
816 | |||
767 | /* | 817 | /* |
768 | * The queue will fill after this allocation, so set | 818 | * The queue will fill after this allocation, so set |
769 | * it as full, and mark this process as "batching". | 819 | * it as full, and mark this process as "batching". |
@@ -799,17 +849,36 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
799 | rl->count[is_sync]++; | 849 | rl->count[is_sync]++; |
800 | rl->starved[is_sync] = 0; | 850 | rl->starved[is_sync] = 0; |
801 | 851 | ||
852 | /* | ||
853 | * Decide whether the new request will be managed by elevator. If | ||
854 | * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will | ||
855 | * prevent the current elevator from being destroyed until the new | ||
856 | * request is freed. This guarantees icq's won't be destroyed and | ||
857 | * makes creating new ones safe. | ||
858 | * | ||
859 | * Also, lookup icq while holding queue_lock. If it doesn't exist, | ||
860 | * it will be created after releasing queue_lock. | ||
861 | */ | ||
802 | if (blk_rq_should_init_elevator(bio) && | 862 | if (blk_rq_should_init_elevator(bio) && |
803 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { | 863 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { |
804 | rw_flags |= REQ_ELVPRIV; | 864 | rw_flags |= REQ_ELVPRIV; |
805 | rl->elvpriv++; | 865 | rl->elvpriv++; |
866 | if (et->icq_cache && ioc) | ||
867 | icq = ioc_lookup_icq(ioc, q); | ||
806 | } | 868 | } |
807 | 869 | ||
808 | if (blk_queue_io_stat(q)) | 870 | if (blk_queue_io_stat(q)) |
809 | rw_flags |= REQ_IO_STAT; | 871 | rw_flags |= REQ_IO_STAT; |
810 | spin_unlock_irq(q->queue_lock); | 872 | spin_unlock_irq(q->queue_lock); |
811 | 873 | ||
812 | rq = blk_alloc_request(q, rw_flags, gfp_mask); | 874 | /* create icq if missing */ |
875 | if (unlikely(et->icq_cache && !icq)) | ||
876 | icq = ioc_create_icq(q, gfp_mask); | ||
877 | |||
878 | /* rqs are guaranteed to have icq on elv_set_request() if requested */ | ||
879 | if (likely(!et->icq_cache || icq)) | ||
880 | rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); | ||
881 | |||
813 | if (unlikely(!rq)) { | 882 | if (unlikely(!rq)) { |
814 | /* | 883 | /* |
815 | * Allocation failed presumably due to memory. Undo anything | 884 | * Allocation failed presumably due to memory. Undo anything |
@@ -871,10 +940,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
871 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 940 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
872 | while (!rq) { | 941 | while (!rq) { |
873 | DEFINE_WAIT(wait); | 942 | DEFINE_WAIT(wait); |
874 | struct io_context *ioc; | ||
875 | struct request_list *rl = &q->rq; | 943 | struct request_list *rl = &q->rq; |
876 | 944 | ||
877 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 945 | if (unlikely(blk_queue_dead(q))) |
878 | return NULL; | 946 | return NULL; |
879 | 947 | ||
880 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 948 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
@@ -891,8 +959,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
891 | * up to a big batch of them for a small period time. | 959 | * up to a big batch of them for a small period time. |
892 | * See ioc_batching, ioc_set_batching | 960 | * See ioc_batching, ioc_set_batching |
893 | */ | 961 | */ |
894 | ioc = current_io_context(GFP_NOIO, q->node); | 962 | create_io_context(current, GFP_NOIO, q->node); |
895 | ioc_set_batching(q, ioc); | 963 | ioc_set_batching(q, current->io_context); |
896 | 964 | ||
897 | spin_lock_irq(q->queue_lock); | 965 | spin_lock_irq(q->queue_lock); |
898 | finish_wait(&rl->wait[is_sync], &wait); | 966 | finish_wait(&rl->wait[is_sync], &wait); |
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq, | |||
1009 | __elv_add_request(q, rq, where); | 1077 | __elv_add_request(q, rq, where); |
1010 | } | 1078 | } |
1011 | 1079 | ||
1012 | /** | ||
1013 | * blk_insert_request - insert a special request into a request queue | ||
1014 | * @q: request queue where request should be inserted | ||
1015 | * @rq: request to be inserted | ||
1016 | * @at_head: insert request at head or tail of queue | ||
1017 | * @data: private data | ||
1018 | * | ||
1019 | * Description: | ||
1020 | * Many block devices need to execute commands asynchronously, so they don't | ||
1021 | * block the whole kernel from preemption during request execution. This is | ||
1022 | * accomplished normally by inserting aritficial requests tagged as | ||
1023 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | ||
1024 | * be scheduled for actual execution by the request queue. | ||
1025 | * | ||
1026 | * We have the option of inserting the head or the tail of the queue. | ||
1027 | * Typically we use the tail for new ioctls and so forth. We use the head | ||
1028 | * of the queue for things like a QUEUE_FULL message from a device, or a | ||
1029 | * host that is unable to accept a particular command. | ||
1030 | */ | ||
1031 | void blk_insert_request(struct request_queue *q, struct request *rq, | ||
1032 | int at_head, void *data) | ||
1033 | { | ||
1034 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | ||
1035 | unsigned long flags; | ||
1036 | |||
1037 | /* | ||
1038 | * tell I/O scheduler that this isn't a regular read/write (ie it | ||
1039 | * must not attempt merges on this) and that it acts as a soft | ||
1040 | * barrier | ||
1041 | */ | ||
1042 | rq->cmd_type = REQ_TYPE_SPECIAL; | ||
1043 | |||
1044 | rq->special = data; | ||
1045 | |||
1046 | spin_lock_irqsave(q->queue_lock, flags); | ||
1047 | |||
1048 | /* | ||
1049 | * If command is tagged, release the tag | ||
1050 | */ | ||
1051 | if (blk_rq_tagged(rq)) | ||
1052 | blk_queue_end_tag(q, rq); | ||
1053 | |||
1054 | add_acct_request(q, rq, where); | ||
1055 | __blk_run_queue(q); | ||
1056 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1057 | } | ||
1058 | EXPORT_SYMBOL(blk_insert_request); | ||
1059 | |||
1060 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1080 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1061 | unsigned long now) | 1081 | unsigned long now) |
1062 | { | 1082 | { |
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1766 | return -EIO; | 1786 | return -EIO; |
1767 | 1787 | ||
1768 | spin_lock_irqsave(q->queue_lock, flags); | 1788 | spin_lock_irqsave(q->queue_lock, flags); |
1789 | if (unlikely(blk_queue_dead(q))) { | ||
1790 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1791 | return -ENODEV; | ||
1792 | } | ||
1769 | 1793 | ||
1770 | /* | 1794 | /* |
1771 | * Submitting request must be dequeued before calling this function | 1795 | * Submitting request must be dequeued before calling this function |
@@ -2740,6 +2764,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, | |||
2740 | trace_block_unplug(q, depth, !from_schedule); | 2764 | trace_block_unplug(q, depth, !from_schedule); |
2741 | 2765 | ||
2742 | /* | 2766 | /* |
2767 | * Don't mess with dead queue. | ||
2768 | */ | ||
2769 | if (unlikely(blk_queue_dead(q))) { | ||
2770 | spin_unlock(q->queue_lock); | ||
2771 | return; | ||
2772 | } | ||
2773 | |||
2774 | /* | ||
2743 | * If we are punting this to kblockd, then we can safely drop | 2775 | * If we are punting this to kblockd, then we can safely drop |
2744 | * the queue_lock before waking kblockd (which needs to take | 2776 | * the queue_lock before waking kblockd (which needs to take |
2745 | * this lock). | 2777 | * this lock). |
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
2815 | depth = 0; | 2847 | depth = 0; |
2816 | spin_lock(q->queue_lock); | 2848 | spin_lock(q->queue_lock); |
2817 | } | 2849 | } |
2850 | |||
2851 | /* | ||
2852 | * Short-circuit if @q is dead | ||
2853 | */ | ||
2854 | if (unlikely(blk_queue_dead(q))) { | ||
2855 | __blk_end_request_all(rq, -ENODEV); | ||
2856 | continue; | ||
2857 | } | ||
2858 | |||
2818 | /* | 2859 | /* |
2819 | * rq is already accounted, so use raw insert | 2860 | * rq is already accounted, so use raw insert |
2820 | */ | 2861 | */ |
diff --git a/block/blk-exec.c b/block/blk-exec.c index a1ebceb332f9..fb2cbd551621 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -50,7 +50,11 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
50 | { | 50 | { |
51 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 51 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
52 | 52 | ||
53 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 53 | WARN_ON(irqs_disabled()); |
54 | spin_lock_irq(q->queue_lock); | ||
55 | |||
56 | if (unlikely(blk_queue_dead(q))) { | ||
57 | spin_unlock_irq(q->queue_lock); | ||
54 | rq->errors = -ENXIO; | 58 | rq->errors = -ENXIO; |
55 | if (rq->end_io) | 59 | if (rq->end_io) |
56 | rq->end_io(rq, rq->errors); | 60 | rq->end_io(rq, rq->errors); |
@@ -59,8 +63,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
59 | 63 | ||
60 | rq->rq_disk = bd_disk; | 64 | rq->rq_disk = bd_disk; |
61 | rq->end_io = done; | 65 | rq->end_io = done; |
62 | WARN_ON(irqs_disabled()); | ||
63 | spin_lock_irq(q->queue_lock); | ||
64 | __elv_add_request(q, rq, where); | 66 | __elv_add_request(q, rq, where); |
65 | __blk_run_queue(q); | 67 | __blk_run_queue(q); |
66 | /* the queue is stopped so it won't be run */ | 68 | /* the queue is stopped so it won't be run */ |
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd978653..27a06e00eaec 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -16,53 +16,214 @@ | |||
16 | */ | 16 | */ |
17 | static struct kmem_cache *iocontext_cachep; | 17 | static struct kmem_cache *iocontext_cachep; |
18 | 18 | ||
19 | static void cfq_dtor(struct io_context *ioc) | 19 | /** |
20 | * get_io_context - increment reference count to io_context | ||
21 | * @ioc: io_context to get | ||
22 | * | ||
23 | * Increment reference count to @ioc. | ||
24 | */ | ||
25 | void get_io_context(struct io_context *ioc) | ||
26 | { | ||
27 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | ||
28 | atomic_long_inc(&ioc->refcount); | ||
29 | } | ||
30 | EXPORT_SYMBOL(get_io_context); | ||
31 | |||
32 | /* | ||
33 | * Releasing ioc may nest into another put_io_context() leading to nested | ||
34 | * fast path release. As the ioc's can't be the same, this is okay but | ||
35 | * makes lockdep whine. Keep track of nesting and use it as subclass. | ||
36 | */ | ||
37 | #ifdef CONFIG_LOCKDEP | ||
38 | #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0) | ||
39 | #define ioc_release_depth_inc(q) (q)->ioc_release_depth++ | ||
40 | #define ioc_release_depth_dec(q) (q)->ioc_release_depth-- | ||
41 | #else | ||
42 | #define ioc_release_depth(q) 0 | ||
43 | #define ioc_release_depth_inc(q) do { } while (0) | ||
44 | #define ioc_release_depth_dec(q) do { } while (0) | ||
45 | #endif | ||
46 | |||
47 | static void icq_free_icq_rcu(struct rcu_head *head) | ||
48 | { | ||
49 | struct io_cq *icq = container_of(head, struct io_cq, __rcu_head); | ||
50 | |||
51 | kmem_cache_free(icq->__rcu_icq_cache, icq); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Exit and free an icq. Called with both ioc and q locked. | ||
56 | */ | ||
57 | static void ioc_exit_icq(struct io_cq *icq) | ||
20 | { | 58 | { |
21 | if (!hlist_empty(&ioc->cic_list)) { | 59 | struct io_context *ioc = icq->ioc; |
22 | struct cfq_io_context *cic; | 60 | struct request_queue *q = icq->q; |
61 | struct elevator_type *et = q->elevator->type; | ||
62 | |||
63 | lockdep_assert_held(&ioc->lock); | ||
64 | lockdep_assert_held(q->queue_lock); | ||
65 | |||
66 | radix_tree_delete(&ioc->icq_tree, icq->q->id); | ||
67 | hlist_del_init(&icq->ioc_node); | ||
68 | list_del_init(&icq->q_node); | ||
69 | |||
70 | /* | ||
71 | * Both setting lookup hint to and clearing it from @icq are done | ||
72 | * under queue_lock. If it's not pointing to @icq now, it never | ||
73 | * will. Hint assignment itself can race safely. | ||
74 | */ | ||
75 | if (rcu_dereference_raw(ioc->icq_hint) == icq) | ||
76 | rcu_assign_pointer(ioc->icq_hint, NULL); | ||
23 | 77 | ||
24 | cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | 78 | if (et->ops.elevator_exit_icq_fn) { |
25 | cic_list); | 79 | ioc_release_depth_inc(q); |
26 | cic->dtor(ioc); | 80 | et->ops.elevator_exit_icq_fn(icq); |
81 | ioc_release_depth_dec(q); | ||
27 | } | 82 | } |
83 | |||
84 | /* | ||
85 | * @icq->q might have gone away by the time RCU callback runs | ||
86 | * making it impossible to determine icq_cache. Record it in @icq. | ||
87 | */ | ||
88 | icq->__rcu_icq_cache = et->icq_cache; | ||
89 | call_rcu(&icq->__rcu_head, icq_free_icq_rcu); | ||
28 | } | 90 | } |
29 | 91 | ||
30 | /* | 92 | /* |
31 | * IO Context helper functions. put_io_context() returns 1 if there are no | 93 | * Slow path for ioc release in put_io_context(). Performs double-lock |
32 | * more users of this io context, 0 otherwise. | 94 | * dancing to unlink all icq's and then frees ioc. |
33 | */ | 95 | */ |
34 | int put_io_context(struct io_context *ioc) | 96 | static void ioc_release_fn(struct work_struct *work) |
35 | { | 97 | { |
36 | if (ioc == NULL) | 98 | struct io_context *ioc = container_of(work, struct io_context, |
37 | return 1; | 99 | release_work); |
100 | struct request_queue *last_q = NULL; | ||
38 | 101 | ||
39 | BUG_ON(atomic_long_read(&ioc->refcount) == 0); | 102 | spin_lock_irq(&ioc->lock); |
40 | 103 | ||
41 | if (atomic_long_dec_and_test(&ioc->refcount)) { | 104 | while (!hlist_empty(&ioc->icq_list)) { |
42 | rcu_read_lock(); | 105 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, |
43 | cfq_dtor(ioc); | 106 | struct io_cq, ioc_node); |
44 | rcu_read_unlock(); | 107 | struct request_queue *this_q = icq->q; |
45 | 108 | ||
46 | kmem_cache_free(iocontext_cachep, ioc); | 109 | if (this_q != last_q) { |
47 | return 1; | 110 | /* |
111 | * Need to switch to @this_q. Once we release | ||
112 | * @ioc->lock, it can go away along with @cic. | ||
113 | * Hold on to it. | ||
114 | */ | ||
115 | __blk_get_queue(this_q); | ||
116 | |||
117 | /* | ||
118 | * blk_put_queue() might sleep thanks to kobject | ||
119 | * idiocy. Always release both locks, put and | ||
120 | * restart. | ||
121 | */ | ||
122 | if (last_q) { | ||
123 | spin_unlock(last_q->queue_lock); | ||
124 | spin_unlock_irq(&ioc->lock); | ||
125 | blk_put_queue(last_q); | ||
126 | } else { | ||
127 | spin_unlock_irq(&ioc->lock); | ||
128 | } | ||
129 | |||
130 | last_q = this_q; | ||
131 | spin_lock_irq(this_q->queue_lock); | ||
132 | spin_lock(&ioc->lock); | ||
133 | continue; | ||
134 | } | ||
135 | ioc_exit_icq(icq); | ||
48 | } | 136 | } |
49 | return 0; | 137 | |
138 | if (last_q) { | ||
139 | spin_unlock(last_q->queue_lock); | ||
140 | spin_unlock_irq(&ioc->lock); | ||
141 | blk_put_queue(last_q); | ||
142 | } else { | ||
143 | spin_unlock_irq(&ioc->lock); | ||
144 | } | ||
145 | |||
146 | kmem_cache_free(iocontext_cachep, ioc); | ||
50 | } | 147 | } |
51 | EXPORT_SYMBOL(put_io_context); | ||
52 | 148 | ||
53 | static void cfq_exit(struct io_context *ioc) | 149 | /** |
150 | * put_io_context - put a reference of io_context | ||
151 | * @ioc: io_context to put | ||
152 | * @locked_q: request_queue the caller is holding queue_lock of (hint) | ||
153 | * | ||
154 | * Decrement reference count of @ioc and release it if the count reaches | ||
155 | * zero. If the caller is holding queue_lock of a queue, it can indicate | ||
156 | * that with @locked_q. This is an optimization hint and the caller is | ||
157 | * allowed to pass in %NULL even when it's holding a queue_lock. | ||
158 | */ | ||
159 | void put_io_context(struct io_context *ioc, struct request_queue *locked_q) | ||
54 | { | 160 | { |
55 | rcu_read_lock(); | 161 | struct request_queue *last_q = locked_q; |
162 | unsigned long flags; | ||
163 | |||
164 | if (ioc == NULL) | ||
165 | return; | ||
166 | |||
167 | BUG_ON(atomic_long_read(&ioc->refcount) <= 0); | ||
168 | if (locked_q) | ||
169 | lockdep_assert_held(locked_q->queue_lock); | ||
56 | 170 | ||
57 | if (!hlist_empty(&ioc->cic_list)) { | 171 | if (!atomic_long_dec_and_test(&ioc->refcount)) |
58 | struct cfq_io_context *cic; | 172 | return; |
173 | |||
174 | /* | ||
175 | * Destroy @ioc. This is a bit messy because icq's are chained | ||
176 | * from both ioc and queue, and ioc->lock nests inside queue_lock. | ||
177 | * The inner ioc->lock should be held to walk our icq_list and then | ||
178 | * for each icq the outer matching queue_lock should be grabbed. | ||
179 | * ie. We need to do reverse-order double lock dancing. | ||
180 | * | ||
181 | * Another twist is that we are often called with one of the | ||
182 | * matching queue_locks held as indicated by @locked_q, which | ||
183 | * prevents performing double-lock dance for other queues. | ||
184 | * | ||
185 | * So, we do it in two stages. The fast path uses the queue_lock | ||
186 | * the caller is holding and, if other queues need to be accessed, | ||
187 | * uses trylock to avoid introducing locking dependency. This can | ||
188 | * handle most cases, especially if @ioc was performing IO on only | ||
189 | * single device. | ||
190 | * | ||
191 | * If trylock doesn't cut it, we defer to @ioc->release_work which | ||
192 | * can do all the double-locking dancing. | ||
193 | */ | ||
194 | spin_lock_irqsave_nested(&ioc->lock, flags, | ||
195 | ioc_release_depth(locked_q)); | ||
59 | 196 | ||
60 | cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, | 197 | while (!hlist_empty(&ioc->icq_list)) { |
61 | cic_list); | 198 | struct io_cq *icq = hlist_entry(ioc->icq_list.first, |
62 | cic->exit(ioc); | 199 | struct io_cq, ioc_node); |
200 | struct request_queue *this_q = icq->q; | ||
201 | |||
202 | if (this_q != last_q) { | ||
203 | if (last_q && last_q != locked_q) | ||
204 | spin_unlock(last_q->queue_lock); | ||
205 | last_q = NULL; | ||
206 | |||
207 | if (!spin_trylock(this_q->queue_lock)) | ||
208 | break; | ||
209 | last_q = this_q; | ||
210 | continue; | ||
211 | } | ||
212 | ioc_exit_icq(icq); | ||
63 | } | 213 | } |
64 | rcu_read_unlock(); | 214 | |
215 | if (last_q && last_q != locked_q) | ||
216 | spin_unlock(last_q->queue_lock); | ||
217 | |||
218 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
219 | |||
220 | /* if no icq is left, we're done; otherwise, kick release_work */ | ||
221 | if (hlist_empty(&ioc->icq_list)) | ||
222 | kmem_cache_free(iocontext_cachep, ioc); | ||
223 | else | ||
224 | schedule_work(&ioc->release_work); | ||
65 | } | 225 | } |
226 | EXPORT_SYMBOL(put_io_context); | ||
66 | 227 | ||
67 | /* Called by the exiting task */ | 228 | /* Called by the exiting task */ |
68 | void exit_io_context(struct task_struct *task) | 229 | void exit_io_context(struct task_struct *task) |
@@ -74,86 +235,240 @@ void exit_io_context(struct task_struct *task) | |||
74 | task->io_context = NULL; | 235 | task->io_context = NULL; |
75 | task_unlock(task); | 236 | task_unlock(task); |
76 | 237 | ||
77 | if (atomic_dec_and_test(&ioc->nr_tasks)) | 238 | atomic_dec(&ioc->nr_tasks); |
78 | cfq_exit(ioc); | 239 | put_io_context(ioc, NULL); |
240 | } | ||
241 | |||
242 | /** | ||
243 | * ioc_clear_queue - break any ioc association with the specified queue | ||
244 | * @q: request_queue being cleared | ||
245 | * | ||
246 | * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. | ||
247 | */ | ||
248 | void ioc_clear_queue(struct request_queue *q) | ||
249 | { | ||
250 | lockdep_assert_held(q->queue_lock); | ||
251 | |||
252 | while (!list_empty(&q->icq_list)) { | ||
253 | struct io_cq *icq = list_entry(q->icq_list.next, | ||
254 | struct io_cq, q_node); | ||
255 | struct io_context *ioc = icq->ioc; | ||
79 | 256 | ||
80 | put_io_context(ioc); | 257 | spin_lock(&ioc->lock); |
258 | ioc_exit_icq(icq); | ||
259 | spin_unlock(&ioc->lock); | ||
260 | } | ||
81 | } | 261 | } |
82 | 262 | ||
83 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | 263 | void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags, |
264 | int node) | ||
84 | { | 265 | { |
85 | struct io_context *ioc; | 266 | struct io_context *ioc; |
86 | 267 | ||
87 | ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 268 | ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, |
88 | if (ioc) { | 269 | node); |
89 | atomic_long_set(&ioc->refcount, 1); | 270 | if (unlikely(!ioc)) |
90 | atomic_set(&ioc->nr_tasks, 1); | 271 | return; |
91 | spin_lock_init(&ioc->lock); | ||
92 | ioc->ioprio_changed = 0; | ||
93 | ioc->ioprio = 0; | ||
94 | ioc->last_waited = 0; /* doesn't matter... */ | ||
95 | ioc->nr_batch_requests = 0; /* because this is 0 */ | ||
96 | INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); | ||
97 | INIT_HLIST_HEAD(&ioc->cic_list); | ||
98 | ioc->ioc_data = NULL; | ||
99 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | ||
100 | ioc->cgroup_changed = 0; | ||
101 | #endif | ||
102 | } | ||
103 | 272 | ||
104 | return ioc; | 273 | /* initialize */ |
274 | atomic_long_set(&ioc->refcount, 1); | ||
275 | atomic_set(&ioc->nr_tasks, 1); | ||
276 | spin_lock_init(&ioc->lock); | ||
277 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); | ||
278 | INIT_HLIST_HEAD(&ioc->icq_list); | ||
279 | INIT_WORK(&ioc->release_work, ioc_release_fn); | ||
280 | |||
281 | /* | ||
282 | * Try to install. ioc shouldn't be installed if someone else | ||
283 | * already did or @task, which isn't %current, is exiting. Note | ||
284 | * that we need to allow ioc creation on exiting %current as exit | ||
285 | * path may issue IOs from e.g. exit_files(). The exit path is | ||
286 | * responsible for not issuing IO after exit_io_context(). | ||
287 | */ | ||
288 | task_lock(task); | ||
289 | if (!task->io_context && | ||
290 | (task == current || !(task->flags & PF_EXITING))) | ||
291 | task->io_context = ioc; | ||
292 | else | ||
293 | kmem_cache_free(iocontext_cachep, ioc); | ||
294 | task_unlock(task); | ||
105 | } | 295 | } |
106 | 296 | ||
107 | /* | 297 | /** |
108 | * If the current task has no IO context then create one and initialise it. | 298 | * get_task_io_context - get io_context of a task |
109 | * Otherwise, return its existing IO context. | 299 | * @task: task of interest |
300 | * @gfp_flags: allocation flags, used if allocation is necessary | ||
301 | * @node: allocation node, used if allocation is necessary | ||
110 | * | 302 | * |
111 | * This returned IO context doesn't have a specifically elevated refcount, | 303 | * Return io_context of @task. If it doesn't exist, it is created with |
112 | * but since the current task itself holds a reference, the context can be | 304 | * @gfp_flags and @node. The returned io_context has its reference count |
113 | * used in general code, so long as it stays within `current` context. | 305 | * incremented. |
306 | * | ||
307 | * This function always goes through task_lock() and it's better to use | ||
308 | * %current->io_context + get_io_context() for %current. | ||
114 | */ | 309 | */ |
115 | struct io_context *current_io_context(gfp_t gfp_flags, int node) | 310 | struct io_context *get_task_io_context(struct task_struct *task, |
311 | gfp_t gfp_flags, int node) | ||
116 | { | 312 | { |
117 | struct task_struct *tsk = current; | 313 | struct io_context *ioc; |
118 | struct io_context *ret; | ||
119 | |||
120 | ret = tsk->io_context; | ||
121 | if (likely(ret)) | ||
122 | return ret; | ||
123 | |||
124 | ret = alloc_io_context(gfp_flags, node); | ||
125 | if (ret) { | ||
126 | /* make sure set_task_ioprio() sees the settings above */ | ||
127 | smp_wmb(); | ||
128 | tsk->io_context = ret; | ||
129 | } | ||
130 | 314 | ||
131 | return ret; | 315 | might_sleep_if(gfp_flags & __GFP_WAIT); |
316 | |||
317 | do { | ||
318 | task_lock(task); | ||
319 | ioc = task->io_context; | ||
320 | if (likely(ioc)) { | ||
321 | get_io_context(ioc); | ||
322 | task_unlock(task); | ||
323 | return ioc; | ||
324 | } | ||
325 | task_unlock(task); | ||
326 | } while (create_io_context(task, gfp_flags, node)); | ||
327 | |||
328 | return NULL; | ||
132 | } | 329 | } |
330 | EXPORT_SYMBOL(get_task_io_context); | ||
133 | 331 | ||
134 | /* | 332 | /** |
135 | * If the current task has no IO context then create one and initialise it. | 333 | * ioc_lookup_icq - lookup io_cq from ioc |
136 | * If it does have a context, take a ref on it. | 334 | * @ioc: the associated io_context |
335 | * @q: the associated request_queue | ||
137 | * | 336 | * |
138 | * This is always called in the context of the task which submitted the I/O. | 337 | * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called |
338 | * with @q->queue_lock held. | ||
139 | */ | 339 | */ |
140 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 340 | struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q) |
141 | { | 341 | { |
142 | struct io_context *ioc = NULL; | 342 | struct io_cq *icq; |
343 | |||
344 | lockdep_assert_held(q->queue_lock); | ||
143 | 345 | ||
144 | /* | 346 | /* |
145 | * Check for unlikely race with exiting task. ioc ref count is | 347 | * icq's are indexed from @ioc using radix tree and hint pointer, |
146 | * zero when ioc is being detached. | 348 | * both of which are protected with RCU. All removals are done |
349 | * holding both q and ioc locks, and we're holding q lock - if we | ||
350 | * find a icq which points to us, it's guaranteed to be valid. | ||
147 | */ | 351 | */ |
148 | do { | 352 | rcu_read_lock(); |
149 | ioc = current_io_context(gfp_flags, node); | 353 | icq = rcu_dereference(ioc->icq_hint); |
150 | if (unlikely(!ioc)) | 354 | if (icq && icq->q == q) |
151 | break; | 355 | goto out; |
152 | } while (!atomic_long_inc_not_zero(&ioc->refcount)); | ||
153 | 356 | ||
154 | return ioc; | 357 | icq = radix_tree_lookup(&ioc->icq_tree, q->id); |
358 | if (icq && icq->q == q) | ||
359 | rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ | ||
360 | else | ||
361 | icq = NULL; | ||
362 | out: | ||
363 | rcu_read_unlock(); | ||
364 | return icq; | ||
155 | } | 365 | } |
156 | EXPORT_SYMBOL(get_io_context); | 366 | EXPORT_SYMBOL(ioc_lookup_icq); |
367 | |||
368 | /** | ||
369 | * ioc_create_icq - create and link io_cq | ||
370 | * @q: request_queue of interest | ||
371 | * @gfp_mask: allocation mask | ||
372 | * | ||
373 | * Make sure io_cq linking %current->io_context and @q exists. If either | ||
374 | * io_context and/or icq don't exist, they will be created using @gfp_mask. | ||
375 | * | ||
376 | * The caller is responsible for ensuring @ioc won't go away and @q is | ||
377 | * alive and will stay alive until this function returns. | ||
378 | */ | ||
379 | struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask) | ||
380 | { | ||
381 | struct elevator_type *et = q->elevator->type; | ||
382 | struct io_context *ioc; | ||
383 | struct io_cq *icq; | ||
384 | |||
385 | /* allocate stuff */ | ||
386 | ioc = create_io_context(current, gfp_mask, q->node); | ||
387 | if (!ioc) | ||
388 | return NULL; | ||
389 | |||
390 | icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO, | ||
391 | q->node); | ||
392 | if (!icq) | ||
393 | return NULL; | ||
394 | |||
395 | if (radix_tree_preload(gfp_mask) < 0) { | ||
396 | kmem_cache_free(et->icq_cache, icq); | ||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | icq->ioc = ioc; | ||
401 | icq->q = q; | ||
402 | INIT_LIST_HEAD(&icq->q_node); | ||
403 | INIT_HLIST_NODE(&icq->ioc_node); | ||
404 | |||
405 | /* lock both q and ioc and try to link @icq */ | ||
406 | spin_lock_irq(q->queue_lock); | ||
407 | spin_lock(&ioc->lock); | ||
408 | |||
409 | if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { | ||
410 | hlist_add_head(&icq->ioc_node, &ioc->icq_list); | ||
411 | list_add(&icq->q_node, &q->icq_list); | ||
412 | if (et->ops.elevator_init_icq_fn) | ||
413 | et->ops.elevator_init_icq_fn(icq); | ||
414 | } else { | ||
415 | kmem_cache_free(et->icq_cache, icq); | ||
416 | icq = ioc_lookup_icq(ioc, q); | ||
417 | if (!icq) | ||
418 | printk(KERN_ERR "cfq: icq link failed!\n"); | ||
419 | } | ||
420 | |||
421 | spin_unlock(&ioc->lock); | ||
422 | spin_unlock_irq(q->queue_lock); | ||
423 | radix_tree_preload_end(); | ||
424 | return icq; | ||
425 | } | ||
426 | |||
427 | void ioc_set_changed(struct io_context *ioc, int which) | ||
428 | { | ||
429 | struct io_cq *icq; | ||
430 | struct hlist_node *n; | ||
431 | |||
432 | hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) | ||
433 | set_bit(which, &icq->changed); | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * ioc_ioprio_changed - notify ioprio change | ||
438 | * @ioc: io_context of interest | ||
439 | * @ioprio: new ioprio | ||
440 | * | ||
441 | * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all | ||
442 | * icq's. iosched is responsible for checking the bit and applying it on | ||
443 | * request issue path. | ||
444 | */ | ||
445 | void ioc_ioprio_changed(struct io_context *ioc, int ioprio) | ||
446 | { | ||
447 | unsigned long flags; | ||
448 | |||
449 | spin_lock_irqsave(&ioc->lock, flags); | ||
450 | ioc->ioprio = ioprio; | ||
451 | ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED); | ||
452 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
453 | } | ||
454 | |||
455 | /** | ||
456 | * ioc_cgroup_changed - notify cgroup change | ||
457 | * @ioc: io_context of interest | ||
458 | * | ||
459 | * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's. | ||
460 | * iosched is responsible for checking the bit and applying it on request | ||
461 | * issue path. | ||
462 | */ | ||
463 | void ioc_cgroup_changed(struct io_context *ioc) | ||
464 | { | ||
465 | unsigned long flags; | ||
466 | |||
467 | spin_lock_irqsave(&ioc->lock, flags); | ||
468 | ioc_set_changed(ioc, ICQ_CGROUP_CHANGED); | ||
469 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
470 | } | ||
471 | EXPORT_SYMBOL(ioc_cgroup_changed); | ||
157 | 472 | ||
158 | static int __init blk_ioc_init(void) | 473 | static int __init blk_ioc_init(void) |
159 | { | 474 | { |
diff --git a/block/blk-settings.c b/block/blk-settings.c index fa1eb0449a05..d3234fc494ad 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -104,9 +104,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); | |||
104 | * @lim: the queue_limits structure to reset | 104 | * @lim: the queue_limits structure to reset |
105 | * | 105 | * |
106 | * Description: | 106 | * Description: |
107 | * Returns a queue_limit struct to its default state. Can be used by | 107 | * Returns a queue_limit struct to its default state. |
108 | * stacking drivers like DM that stage table swaps and reuse an | ||
109 | * existing device queue. | ||
110 | */ | 108 | */ |
111 | void blk_set_default_limits(struct queue_limits *lim) | 109 | void blk_set_default_limits(struct queue_limits *lim) |
112 | { | 110 | { |
@@ -114,13 +112,12 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
114 | lim->max_integrity_segments = 0; | 112 | lim->max_integrity_segments = 0; |
115 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
116 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
117 | lim->max_sectors = BLK_DEF_MAX_SECTORS; | 115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; |
118 | lim->max_hw_sectors = INT_MAX; | ||
119 | lim->max_discard_sectors = 0; | 116 | lim->max_discard_sectors = 0; |
120 | lim->discard_granularity = 0; | 117 | lim->discard_granularity = 0; |
121 | lim->discard_alignment = 0; | 118 | lim->discard_alignment = 0; |
122 | lim->discard_misaligned = 0; | 119 | lim->discard_misaligned = 0; |
123 | lim->discard_zeroes_data = 1; | 120 | lim->discard_zeroes_data = 0; |
124 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; | 121 | lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; |
125 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); | 122 | lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); |
126 | lim->alignment_offset = 0; | 123 | lim->alignment_offset = 0; |
@@ -131,6 +128,27 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
131 | EXPORT_SYMBOL(blk_set_default_limits); | 128 | EXPORT_SYMBOL(blk_set_default_limits); |
132 | 129 | ||
133 | /** | 130 | /** |
131 | * blk_set_stacking_limits - set default limits for stacking devices | ||
132 | * @lim: the queue_limits structure to reset | ||
133 | * | ||
134 | * Description: | ||
135 | * Returns a queue_limit struct to its default state. Should be used | ||
136 | * by stacking drivers like DM that have no internal limits. | ||
137 | */ | ||
138 | void blk_set_stacking_limits(struct queue_limits *lim) | ||
139 | { | ||
140 | blk_set_default_limits(lim); | ||
141 | |||
142 | /* Inherit limits from component devices */ | ||
143 | lim->discard_zeroes_data = 1; | ||
144 | lim->max_segments = USHRT_MAX; | ||
145 | lim->max_hw_sectors = UINT_MAX; | ||
146 | |||
147 | lim->max_sectors = BLK_DEF_MAX_SECTORS; | ||
148 | } | ||
149 | EXPORT_SYMBOL(blk_set_stacking_limits); | ||
150 | |||
151 | /** | ||
134 | * blk_queue_make_request - define an alternate make_request function for a device | 152 | * blk_queue_make_request - define an alternate make_request function for a device |
135 | * @q: the request queue for the device to be affected | 153 | * @q: the request queue for the device to be affected |
136 | * @mfn: the alternate make_request function | 154 | * @mfn: the alternate make_request function |
@@ -165,8 +183,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) | |||
165 | q->nr_batching = BLK_BATCH_REQ; | 183 | q->nr_batching = BLK_BATCH_REQ; |
166 | 184 | ||
167 | blk_set_default_limits(&q->limits); | 185 | blk_set_default_limits(&q->limits); |
168 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | ||
169 | q->limits.discard_zeroes_data = 0; | ||
170 | 186 | ||
171 | /* | 187 | /* |
172 | * by default assume old behaviour and bounce for any highmem page | 188 | * by default assume old behaviour and bounce for any highmem page |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e7f9f657f105..cf150011d808 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -425,7 +425,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |||
425 | if (!entry->show) | 425 | if (!entry->show) |
426 | return -EIO; | 426 | return -EIO; |
427 | mutex_lock(&q->sysfs_lock); | 427 | mutex_lock(&q->sysfs_lock); |
428 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 428 | if (blk_queue_dead(q)) { |
429 | mutex_unlock(&q->sysfs_lock); | 429 | mutex_unlock(&q->sysfs_lock); |
430 | return -ENOENT; | 430 | return -ENOENT; |
431 | } | 431 | } |
@@ -447,7 +447,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, | |||
447 | 447 | ||
448 | q = container_of(kobj, struct request_queue, kobj); | 448 | q = container_of(kobj, struct request_queue, kobj); |
449 | mutex_lock(&q->sysfs_lock); | 449 | mutex_lock(&q->sysfs_lock); |
450 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | 450 | if (blk_queue_dead(q)) { |
451 | mutex_unlock(&q->sysfs_lock); | 451 | mutex_unlock(&q->sysfs_lock); |
452 | return -ENOENT; | 452 | return -ENOENT; |
453 | } | 453 | } |
@@ -479,8 +479,12 @@ static void blk_release_queue(struct kobject *kobj) | |||
479 | 479 | ||
480 | blk_sync_queue(q); | 480 | blk_sync_queue(q); |
481 | 481 | ||
482 | if (q->elevator) | 482 | if (q->elevator) { |
483 | spin_lock_irq(q->queue_lock); | ||
484 | ioc_clear_queue(q); | ||
485 | spin_unlock_irq(q->queue_lock); | ||
483 | elevator_exit(q->elevator); | 486 | elevator_exit(q->elevator); |
487 | } | ||
484 | 488 | ||
485 | blk_throtl_exit(q); | 489 | blk_throtl_exit(q); |
486 | 490 | ||
@@ -494,6 +498,8 @@ static void blk_release_queue(struct kobject *kobj) | |||
494 | blk_trace_shutdown(q); | 498 | blk_trace_shutdown(q); |
495 | 499 | ||
496 | bdi_destroy(&q->backing_dev_info); | 500 | bdi_destroy(&q->backing_dev_info); |
501 | |||
502 | ida_simple_remove(&blk_queue_ida, q->id); | ||
497 | kmem_cache_free(blk_requestq_cachep, q); | 503 | kmem_cache_free(blk_requestq_cachep, q); |
498 | } | 504 | } |
499 | 505 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 4553245d9317..5eed6a76721d 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) | |||
310 | struct request_queue *q = td->queue; | 310 | struct request_queue *q = td->queue; |
311 | 311 | ||
312 | /* no throttling for dead queue */ | 312 | /* no throttling for dead queue */ |
313 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 313 | if (unlikely(blk_queue_dead(q))) |
314 | return NULL; | 314 | return NULL; |
315 | 315 | ||
316 | rcu_read_lock(); | 316 | rcu_read_lock(); |
@@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) | |||
335 | spin_lock_irq(q->queue_lock); | 335 | spin_lock_irq(q->queue_lock); |
336 | 336 | ||
337 | /* Make sure @q is still alive */ | 337 | /* Make sure @q is still alive */ |
338 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 338 | if (unlikely(blk_queue_dead(q))) { |
339 | kfree(tg); | 339 | kfree(tg); |
340 | return NULL; | 340 | return NULL; |
341 | } | 341 | } |
diff --git a/block/blk.h b/block/blk.h index 3f6551b3c92d..7efd772336de 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef BLK_INTERNAL_H | 1 | #ifndef BLK_INTERNAL_H |
2 | #define BLK_INTERNAL_H | 2 | #define BLK_INTERNAL_H |
3 | 3 | ||
4 | #include <linux/idr.h> | ||
5 | |||
4 | /* Amount of time in which a process may batch requests */ | 6 | /* Amount of time in which a process may batch requests */ |
5 | #define BLK_BATCH_TIME (HZ/50UL) | 7 | #define BLK_BATCH_TIME (HZ/50UL) |
6 | 8 | ||
@@ -9,6 +11,12 @@ | |||
9 | 11 | ||
10 | extern struct kmem_cache *blk_requestq_cachep; | 12 | extern struct kmem_cache *blk_requestq_cachep; |
11 | extern struct kobj_type blk_queue_ktype; | 13 | extern struct kobj_type blk_queue_ktype; |
14 | extern struct ida blk_queue_ida; | ||
15 | |||
16 | static inline void __blk_get_queue(struct request_queue *q) | ||
17 | { | ||
18 | kobject_get(&q->kobj); | ||
19 | } | ||
12 | 20 | ||
13 | void init_request_from_bio(struct request *req, struct bio *bio); | 21 | void init_request_from_bio(struct request *req, struct bio *bio); |
14 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 22 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
@@ -85,8 +93,8 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
85 | q->flush_queue_delayed = 1; | 93 | q->flush_queue_delayed = 1; |
86 | return NULL; | 94 | return NULL; |
87 | } | 95 | } |
88 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) || | 96 | if (unlikely(blk_queue_dead(q)) || |
89 | !q->elevator->ops->elevator_dispatch_fn(q, 0)) | 97 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) |
90 | return NULL; | 98 | return NULL; |
91 | } | 99 | } |
92 | } | 100 | } |
@@ -95,16 +103,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) | |||
95 | { | 103 | { |
96 | struct elevator_queue *e = q->elevator; | 104 | struct elevator_queue *e = q->elevator; |
97 | 105 | ||
98 | if (e->ops->elevator_activate_req_fn) | 106 | if (e->type->ops.elevator_activate_req_fn) |
99 | e->ops->elevator_activate_req_fn(q, rq); | 107 | e->type->ops.elevator_activate_req_fn(q, rq); |
100 | } | 108 | } |
101 | 109 | ||
102 | static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) | 110 | static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
103 | { | 111 | { |
104 | struct elevator_queue *e = q->elevator; | 112 | struct elevator_queue *e = q->elevator; |
105 | 113 | ||
106 | if (e->ops->elevator_deactivate_req_fn) | 114 | if (e->type->ops.elevator_deactivate_req_fn) |
107 | e->ops->elevator_deactivate_req_fn(q, rq); | 115 | e->type->ops.elevator_deactivate_req_fn(q, rq); |
108 | } | 116 | } |
109 | 117 | ||
110 | #ifdef CONFIG_FAIL_IO_TIMEOUT | 118 | #ifdef CONFIG_FAIL_IO_TIMEOUT |
@@ -119,8 +127,6 @@ static inline int blk_should_fake_timeout(struct request_queue *q) | |||
119 | } | 127 | } |
120 | #endif | 128 | #endif |
121 | 129 | ||
122 | struct io_context *current_io_context(gfp_t gfp_flags, int node); | ||
123 | |||
124 | int ll_back_merge_fn(struct request_queue *q, struct request *req, | 130 | int ll_back_merge_fn(struct request_queue *q, struct request *req, |
125 | struct bio *bio); | 131 | struct bio *bio); |
126 | int ll_front_merge_fn(struct request_queue *q, struct request *req, | 132 | int ll_front_merge_fn(struct request_queue *q, struct request *req, |
@@ -189,6 +195,42 @@ static inline int blk_do_io_stat(struct request *rq) | |||
189 | (rq->cmd_flags & REQ_DISCARD)); | 195 | (rq->cmd_flags & REQ_DISCARD)); |
190 | } | 196 | } |
191 | 197 | ||
198 | /* | ||
199 | * Internal io_context interface | ||
200 | */ | ||
201 | void get_io_context(struct io_context *ioc); | ||
202 | struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); | ||
203 | struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask); | ||
204 | void ioc_clear_queue(struct request_queue *q); | ||
205 | |||
206 | void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, | ||
207 | int node); | ||
208 | |||
209 | /** | ||
210 | * create_io_context - try to create task->io_context | ||
211 | * @task: target task | ||
212 | * @gfp_mask: allocation mask | ||
213 | * @node: allocation node | ||
214 | * | ||
215 | * If @task->io_context is %NULL, allocate a new io_context and install it. | ||
216 | * Returns the current @task->io_context which may be %NULL if allocation | ||
217 | * failed. | ||
218 | * | ||
219 | * Note that this function can't be called with IRQ disabled because | ||
220 | * task_lock which protects @task->io_context is IRQ-unsafe. | ||
221 | */ | ||
222 | static inline struct io_context *create_io_context(struct task_struct *task, | ||
223 | gfp_t gfp_mask, int node) | ||
224 | { | ||
225 | WARN_ON_ONCE(irqs_disabled()); | ||
226 | if (unlikely(!task->io_context)) | ||
227 | create_io_context_slowpath(task, gfp_mask, node); | ||
228 | return task->io_context; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Internal throttling interface | ||
233 | */ | ||
192 | #ifdef CONFIG_BLK_DEV_THROTTLING | 234 | #ifdef CONFIG_BLK_DEV_THROTTLING |
193 | extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); | 235 | extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); |
194 | extern void blk_throtl_drain(struct request_queue *q); | 236 | extern void blk_throtl_drain(struct request_queue *q); |
diff --git a/block/bsg.c b/block/bsg.c index 9651ec7b87c2..4cf703fd98bb 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -769,12 +769,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode, | |||
769 | struct file *file) | 769 | struct file *file) |
770 | { | 770 | { |
771 | struct bsg_device *bd; | 771 | struct bsg_device *bd; |
772 | int ret; | ||
773 | #ifdef BSG_DEBUG | 772 | #ifdef BSG_DEBUG |
774 | unsigned char buf[32]; | 773 | unsigned char buf[32]; |
775 | #endif | 774 | #endif |
776 | ret = blk_get_queue(rq); | 775 | if (!blk_get_queue(rq)) |
777 | if (ret) | ||
778 | return ERR_PTR(-ENXIO); | 776 | return ERR_PTR(-ENXIO); |
779 | 777 | ||
780 | bd = bsg_alloc_device(); | 778 | bd = bsg_alloc_device(); |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3548705b04e4..163263ddd381 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
15 | #include <linux/ioprio.h> | 15 | #include <linux/ioprio.h> |
16 | #include <linux/blktrace_api.h> | 16 | #include <linux/blktrace_api.h> |
17 | #include "blk.h" | ||
17 | #include "cfq.h" | 18 | #include "cfq.h" |
18 | 19 | ||
19 | /* | 20 | /* |
@@ -53,20 +54,11 @@ static const int cfq_hist_divisor = 4; | |||
53 | #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) | 54 | #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) |
54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) | 55 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
55 | 56 | ||
56 | #define RQ_CIC(rq) \ | 57 | #define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) |
57 | ((struct cfq_io_context *) (rq)->elevator_private[0]) | 58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) |
58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1]) | 59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) |
59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2]) | ||
60 | 60 | ||
61 | static struct kmem_cache *cfq_pool; | 61 | static struct kmem_cache *cfq_pool; |
62 | static struct kmem_cache *cfq_ioc_pool; | ||
63 | |||
64 | static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); | ||
65 | static struct completion *ioc_gone; | ||
66 | static DEFINE_SPINLOCK(ioc_gone_lock); | ||
67 | |||
68 | static DEFINE_SPINLOCK(cic_index_lock); | ||
69 | static DEFINE_IDA(cic_index_ida); | ||
70 | 62 | ||
71 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR | 63 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR |
72 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) | 64 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) |
@@ -75,6 +67,14 @@ static DEFINE_IDA(cic_index_ida); | |||
75 | #define sample_valid(samples) ((samples) > 80) | 67 | #define sample_valid(samples) ((samples) > 80) |
76 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) | 68 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) |
77 | 69 | ||
70 | struct cfq_ttime { | ||
71 | unsigned long last_end_request; | ||
72 | |||
73 | unsigned long ttime_total; | ||
74 | unsigned long ttime_samples; | ||
75 | unsigned long ttime_mean; | ||
76 | }; | ||
77 | |||
78 | /* | 78 | /* |
79 | * Most of our rbtree usage is for sorting with min extraction, so | 79 | * Most of our rbtree usage is for sorting with min extraction, so |
80 | * if we cache the leftmost node we don't have to walk down the tree | 80 | * if we cache the leftmost node we don't have to walk down the tree |
@@ -216,6 +216,12 @@ struct cfq_group { | |||
216 | struct cfq_ttime ttime; | 216 | struct cfq_ttime ttime; |
217 | }; | 217 | }; |
218 | 218 | ||
219 | struct cfq_io_cq { | ||
220 | struct io_cq icq; /* must be the first member */ | ||
221 | struct cfq_queue *cfqq[2]; | ||
222 | struct cfq_ttime ttime; | ||
223 | }; | ||
224 | |||
219 | /* | 225 | /* |
220 | * Per block device queue structure | 226 | * Per block device queue structure |
221 | */ | 227 | */ |
@@ -267,7 +273,7 @@ struct cfq_data { | |||
267 | struct work_struct unplug_work; | 273 | struct work_struct unplug_work; |
268 | 274 | ||
269 | struct cfq_queue *active_queue; | 275 | struct cfq_queue *active_queue; |
270 | struct cfq_io_context *active_cic; | 276 | struct cfq_io_cq *active_cic; |
271 | 277 | ||
272 | /* | 278 | /* |
273 | * async queue for each priority case | 279 | * async queue for each priority case |
@@ -290,9 +296,6 @@ struct cfq_data { | |||
290 | unsigned int cfq_group_idle; | 296 | unsigned int cfq_group_idle; |
291 | unsigned int cfq_latency; | 297 | unsigned int cfq_latency; |
292 | 298 | ||
293 | unsigned int cic_index; | ||
294 | struct list_head cic_list; | ||
295 | |||
296 | /* | 299 | /* |
297 | * Fallback dummy cfqq for extreme OOM conditions | 300 | * Fallback dummy cfqq for extreme OOM conditions |
298 | */ | 301 | */ |
@@ -464,37 +467,35 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, | |||
464 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 467 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
465 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, | 468 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, |
466 | struct io_context *, gfp_t); | 469 | struct io_context *, gfp_t); |
467 | static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, | ||
468 | struct io_context *); | ||
469 | 470 | ||
470 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, | 471 | static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq) |
471 | bool is_sync) | ||
472 | { | 472 | { |
473 | return cic->cfqq[is_sync]; | 473 | /* cic->icq is the first member, %NULL will convert to %NULL */ |
474 | return container_of(icq, struct cfq_io_cq, icq); | ||
474 | } | 475 | } |
475 | 476 | ||
476 | static inline void cic_set_cfqq(struct cfq_io_context *cic, | 477 | static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd, |
477 | struct cfq_queue *cfqq, bool is_sync) | 478 | struct io_context *ioc) |
478 | { | 479 | { |
479 | cic->cfqq[is_sync] = cfqq; | 480 | if (ioc) |
481 | return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue)); | ||
482 | return NULL; | ||
480 | } | 483 | } |
481 | 484 | ||
482 | #define CIC_DEAD_KEY 1ul | 485 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync) |
483 | #define CIC_DEAD_INDEX_SHIFT 1 | ||
484 | |||
485 | static inline void *cfqd_dead_key(struct cfq_data *cfqd) | ||
486 | { | 486 | { |
487 | return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); | 487 | return cic->cfqq[is_sync]; |
488 | } | 488 | } |
489 | 489 | ||
490 | static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) | 490 | static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq, |
491 | bool is_sync) | ||
491 | { | 492 | { |
492 | struct cfq_data *cfqd = cic->key; | 493 | cic->cfqq[is_sync] = cfqq; |
493 | 494 | } | |
494 | if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) | ||
495 | return NULL; | ||
496 | 495 | ||
497 | return cfqd; | 496 | static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic) |
497 | { | ||
498 | return cic->icq.q->elevator->elevator_data; | ||
498 | } | 499 | } |
499 | 500 | ||
500 | /* | 501 | /* |
@@ -1561,7 +1562,7 @@ static struct request * | |||
1561 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | 1562 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) |
1562 | { | 1563 | { |
1563 | struct task_struct *tsk = current; | 1564 | struct task_struct *tsk = current; |
1564 | struct cfq_io_context *cic; | 1565 | struct cfq_io_cq *cic; |
1565 | struct cfq_queue *cfqq; | 1566 | struct cfq_queue *cfqq; |
1566 | 1567 | ||
1567 | cic = cfq_cic_lookup(cfqd, tsk->io_context); | 1568 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
@@ -1687,7 +1688,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
1687 | struct bio *bio) | 1688 | struct bio *bio) |
1688 | { | 1689 | { |
1689 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1690 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1690 | struct cfq_io_context *cic; | 1691 | struct cfq_io_cq *cic; |
1691 | struct cfq_queue *cfqq; | 1692 | struct cfq_queue *cfqq; |
1692 | 1693 | ||
1693 | /* | 1694 | /* |
@@ -1697,12 +1698,19 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
1697 | return false; | 1698 | return false; |
1698 | 1699 | ||
1699 | /* | 1700 | /* |
1700 | * Lookup the cfqq that this bio will be queued with. Allow | 1701 | * Lookup the cfqq that this bio will be queued with and allow |
1701 | * merge only if rq is queued there. | 1702 | * merge only if rq is queued there. This function can be called |
1703 | * from plug merge without queue_lock. In such cases, ioc of @rq | ||
1704 | * and %current are guaranteed to be equal. Avoid lookup which | ||
1705 | * requires queue_lock by using @rq's cic. | ||
1702 | */ | 1706 | */ |
1703 | cic = cfq_cic_lookup(cfqd, current->io_context); | 1707 | if (current->io_context == RQ_CIC(rq)->icq.ioc) { |
1704 | if (!cic) | 1708 | cic = RQ_CIC(rq); |
1705 | return false; | 1709 | } else { |
1710 | cic = cfq_cic_lookup(cfqd, current->io_context); | ||
1711 | if (!cic) | ||
1712 | return false; | ||
1713 | } | ||
1706 | 1714 | ||
1707 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); | 1715 | cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); |
1708 | return cfqq == RQ_CFQQ(rq); | 1716 | return cfqq == RQ_CFQQ(rq); |
@@ -1786,7 +1794,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1786 | cfqd->active_queue = NULL; | 1794 | cfqd->active_queue = NULL; |
1787 | 1795 | ||
1788 | if (cfqd->active_cic) { | 1796 | if (cfqd->active_cic) { |
1789 | put_io_context(cfqd->active_cic->ioc); | 1797 | put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue); |
1790 | cfqd->active_cic = NULL; | 1798 | cfqd->active_cic = NULL; |
1791 | } | 1799 | } |
1792 | } | 1800 | } |
@@ -2006,7 +2014,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2006 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) | 2014 | static void cfq_arm_slice_timer(struct cfq_data *cfqd) |
2007 | { | 2015 | { |
2008 | struct cfq_queue *cfqq = cfqd->active_queue; | 2016 | struct cfq_queue *cfqq = cfqd->active_queue; |
2009 | struct cfq_io_context *cic; | 2017 | struct cfq_io_cq *cic; |
2010 | unsigned long sl, group_idle = 0; | 2018 | unsigned long sl, group_idle = 0; |
2011 | 2019 | ||
2012 | /* | 2020 | /* |
@@ -2041,7 +2049,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
2041 | * task has exited, don't wait | 2049 | * task has exited, don't wait |
2042 | */ | 2050 | */ |
2043 | cic = cfqd->active_cic; | 2051 | cic = cfqd->active_cic; |
2044 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) | 2052 | if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks)) |
2045 | return; | 2053 | return; |
2046 | 2054 | ||
2047 | /* | 2055 | /* |
@@ -2592,9 +2600,9 @@ static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2592 | cfq_dispatch_insert(cfqd->queue, rq); | 2600 | cfq_dispatch_insert(cfqd->queue, rq); |
2593 | 2601 | ||
2594 | if (!cfqd->active_cic) { | 2602 | if (!cfqd->active_cic) { |
2595 | struct cfq_io_context *cic = RQ_CIC(rq); | 2603 | struct cfq_io_cq *cic = RQ_CIC(rq); |
2596 | 2604 | ||
2597 | atomic_long_inc(&cic->ioc->refcount); | 2605 | atomic_long_inc(&cic->icq.ioc->refcount); |
2598 | cfqd->active_cic = cic; | 2606 | cfqd->active_cic = cic; |
2599 | } | 2607 | } |
2600 | 2608 | ||
@@ -2677,84 +2685,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2677 | cfq_put_cfqg(cfqg); | 2685 | cfq_put_cfqg(cfqg); |
2678 | } | 2686 | } |
2679 | 2687 | ||
2680 | /* | ||
2681 | * Call func for each cic attached to this ioc. | ||
2682 | */ | ||
2683 | static void | ||
2684 | call_for_each_cic(struct io_context *ioc, | ||
2685 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
2686 | { | ||
2687 | struct cfq_io_context *cic; | ||
2688 | struct hlist_node *n; | ||
2689 | |||
2690 | rcu_read_lock(); | ||
2691 | |||
2692 | hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) | ||
2693 | func(ioc, cic); | ||
2694 | |||
2695 | rcu_read_unlock(); | ||
2696 | } | ||
2697 | |||
2698 | static void cfq_cic_free_rcu(struct rcu_head *head) | ||
2699 | { | ||
2700 | struct cfq_io_context *cic; | ||
2701 | |||
2702 | cic = container_of(head, struct cfq_io_context, rcu_head); | ||
2703 | |||
2704 | kmem_cache_free(cfq_ioc_pool, cic); | ||
2705 | elv_ioc_count_dec(cfq_ioc_count); | ||
2706 | |||
2707 | if (ioc_gone) { | ||
2708 | /* | ||
2709 | * CFQ scheduler is exiting, grab exit lock and check | ||
2710 | * the pending io context count. If it hits zero, | ||
2711 | * complete ioc_gone and set it back to NULL | ||
2712 | */ | ||
2713 | spin_lock(&ioc_gone_lock); | ||
2714 | if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) { | ||
2715 | complete(ioc_gone); | ||
2716 | ioc_gone = NULL; | ||
2717 | } | ||
2718 | spin_unlock(&ioc_gone_lock); | ||
2719 | } | ||
2720 | } | ||
2721 | |||
2722 | static void cfq_cic_free(struct cfq_io_context *cic) | ||
2723 | { | ||
2724 | call_rcu(&cic->rcu_head, cfq_cic_free_rcu); | ||
2725 | } | ||
2726 | |||
2727 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | ||
2728 | { | ||
2729 | unsigned long flags; | ||
2730 | unsigned long dead_key = (unsigned long) cic->key; | ||
2731 | |||
2732 | BUG_ON(!(dead_key & CIC_DEAD_KEY)); | ||
2733 | |||
2734 | spin_lock_irqsave(&ioc->lock, flags); | ||
2735 | radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); | ||
2736 | hlist_del_rcu(&cic->cic_list); | ||
2737 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
2738 | |||
2739 | cfq_cic_free(cic); | ||
2740 | } | ||
2741 | |||
2742 | /* | ||
2743 | * Must be called with rcu_read_lock() held or preemption otherwise disabled. | ||
2744 | * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), | ||
2745 | * and ->trim() which is called with the task lock held | ||
2746 | */ | ||
2747 | static void cfq_free_io_context(struct io_context *ioc) | ||
2748 | { | ||
2749 | /* | ||
2750 | * ioc->refcount is zero here, or we are called from elv_unregister(), | ||
2751 | * so no more cic's are allowed to be linked into this ioc. So it | ||
2752 | * should be ok to iterate over the known list, we will see all cic's | ||
2753 | * since no new ones are added. | ||
2754 | */ | ||
2755 | call_for_each_cic(ioc, cic_free_func); | ||
2756 | } | ||
2757 | |||
2758 | static void cfq_put_cooperator(struct cfq_queue *cfqq) | 2688 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
2759 | { | 2689 | { |
2760 | struct cfq_queue *__cfqq, *next; | 2690 | struct cfq_queue *__cfqq, *next; |
@@ -2788,27 +2718,17 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2788 | cfq_put_queue(cfqq); | 2718 | cfq_put_queue(cfqq); |
2789 | } | 2719 | } |
2790 | 2720 | ||
2791 | static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | 2721 | static void cfq_init_icq(struct io_cq *icq) |
2792 | struct cfq_io_context *cic) | ||
2793 | { | 2722 | { |
2794 | struct io_context *ioc = cic->ioc; | 2723 | struct cfq_io_cq *cic = icq_to_cic(icq); |
2795 | |||
2796 | list_del_init(&cic->queue_list); | ||
2797 | 2724 | ||
2798 | /* | 2725 | cic->ttime.last_end_request = jiffies; |
2799 | * Make sure dead mark is seen for dead queues | 2726 | } |
2800 | */ | ||
2801 | smp_wmb(); | ||
2802 | cic->key = cfqd_dead_key(cfqd); | ||
2803 | 2727 | ||
2804 | rcu_read_lock(); | 2728 | static void cfq_exit_icq(struct io_cq *icq) |
2805 | if (rcu_dereference(ioc->ioc_data) == cic) { | 2729 | { |
2806 | rcu_read_unlock(); | 2730 | struct cfq_io_cq *cic = icq_to_cic(icq); |
2807 | spin_lock(&ioc->lock); | 2731 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2808 | rcu_assign_pointer(ioc->ioc_data, NULL); | ||
2809 | spin_unlock(&ioc->lock); | ||
2810 | } else | ||
2811 | rcu_read_unlock(); | ||
2812 | 2732 | ||
2813 | if (cic->cfqq[BLK_RW_ASYNC]) { | 2733 | if (cic->cfqq[BLK_RW_ASYNC]) { |
2814 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); | 2734 | cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); |
@@ -2821,57 +2741,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
2821 | } | 2741 | } |
2822 | } | 2742 | } |
2823 | 2743 | ||
2824 | static void cfq_exit_single_io_context(struct io_context *ioc, | ||
2825 | struct cfq_io_context *cic) | ||
2826 | { | ||
2827 | struct cfq_data *cfqd = cic_to_cfqd(cic); | ||
2828 | |||
2829 | if (cfqd) { | ||
2830 | struct request_queue *q = cfqd->queue; | ||
2831 | unsigned long flags; | ||
2832 | |||
2833 | spin_lock_irqsave(q->queue_lock, flags); | ||
2834 | |||
2835 | /* | ||
2836 | * Ensure we get a fresh copy of the ->key to prevent | ||
2837 | * race between exiting task and queue | ||
2838 | */ | ||
2839 | smp_read_barrier_depends(); | ||
2840 | if (cic->key == cfqd) | ||
2841 | __cfq_exit_single_io_context(cfqd, cic); | ||
2842 | |||
2843 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2844 | } | ||
2845 | } | ||
2846 | |||
2847 | /* | ||
2848 | * The process that ioc belongs to has exited, we need to clean up | ||
2849 | * and put the internal structures we have that belongs to that process. | ||
2850 | */ | ||
2851 | static void cfq_exit_io_context(struct io_context *ioc) | ||
2852 | { | ||
2853 | call_for_each_cic(ioc, cfq_exit_single_io_context); | ||
2854 | } | ||
2855 | |||
2856 | static struct cfq_io_context * | ||
2857 | cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | ||
2858 | { | ||
2859 | struct cfq_io_context *cic; | ||
2860 | |||
2861 | cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, | ||
2862 | cfqd->queue->node); | ||
2863 | if (cic) { | ||
2864 | cic->ttime.last_end_request = jiffies; | ||
2865 | INIT_LIST_HEAD(&cic->queue_list); | ||
2866 | INIT_HLIST_NODE(&cic->cic_list); | ||
2867 | cic->dtor = cfq_free_io_context; | ||
2868 | cic->exit = cfq_exit_io_context; | ||
2869 | elv_ioc_count_inc(cfq_ioc_count); | ||
2870 | } | ||
2871 | |||
2872 | return cic; | ||
2873 | } | ||
2874 | |||
2875 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) | 2744 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
2876 | { | 2745 | { |
2877 | struct task_struct *tsk = current; | 2746 | struct task_struct *tsk = current; |
@@ -2914,21 +2783,18 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) | |||
2914 | cfq_clear_cfqq_prio_changed(cfqq); | 2783 | cfq_clear_cfqq_prio_changed(cfqq); |
2915 | } | 2784 | } |
2916 | 2785 | ||
2917 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) | 2786 | static void changed_ioprio(struct cfq_io_cq *cic) |
2918 | { | 2787 | { |
2919 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 2788 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2920 | struct cfq_queue *cfqq; | 2789 | struct cfq_queue *cfqq; |
2921 | unsigned long flags; | ||
2922 | 2790 | ||
2923 | if (unlikely(!cfqd)) | 2791 | if (unlikely(!cfqd)) |
2924 | return; | 2792 | return; |
2925 | 2793 | ||
2926 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2927 | |||
2928 | cfqq = cic->cfqq[BLK_RW_ASYNC]; | 2794 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
2929 | if (cfqq) { | 2795 | if (cfqq) { |
2930 | struct cfq_queue *new_cfqq; | 2796 | struct cfq_queue *new_cfqq; |
2931 | new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, | 2797 | new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc, |
2932 | GFP_ATOMIC); | 2798 | GFP_ATOMIC); |
2933 | if (new_cfqq) { | 2799 | if (new_cfqq) { |
2934 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; | 2800 | cic->cfqq[BLK_RW_ASYNC] = new_cfqq; |
@@ -2939,14 +2805,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) | |||
2939 | cfqq = cic->cfqq[BLK_RW_SYNC]; | 2805 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
2940 | if (cfqq) | 2806 | if (cfqq) |
2941 | cfq_mark_cfqq_prio_changed(cfqq); | 2807 | cfq_mark_cfqq_prio_changed(cfqq); |
2942 | |||
2943 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2944 | } | ||
2945 | |||
2946 | static void cfq_ioc_set_ioprio(struct io_context *ioc) | ||
2947 | { | ||
2948 | call_for_each_cic(ioc, changed_ioprio); | ||
2949 | ioc->ioprio_changed = 0; | ||
2950 | } | 2808 | } |
2951 | 2809 | ||
2952 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 2810 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
@@ -2970,11 +2828,10 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2970 | } | 2828 | } |
2971 | 2829 | ||
2972 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 2830 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
2973 | static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) | 2831 | static void changed_cgroup(struct cfq_io_cq *cic) |
2974 | { | 2832 | { |
2975 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); | 2833 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); |
2976 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 2834 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2977 | unsigned long flags; | ||
2978 | struct request_queue *q; | 2835 | struct request_queue *q; |
2979 | 2836 | ||
2980 | if (unlikely(!cfqd)) | 2837 | if (unlikely(!cfqd)) |
@@ -2982,8 +2839,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) | |||
2982 | 2839 | ||
2983 | q = cfqd->queue; | 2840 | q = cfqd->queue; |
2984 | 2841 | ||
2985 | spin_lock_irqsave(q->queue_lock, flags); | ||
2986 | |||
2987 | if (sync_cfqq) { | 2842 | if (sync_cfqq) { |
2988 | /* | 2843 | /* |
2989 | * Drop reference to sync queue. A new sync queue will be | 2844 | * Drop reference to sync queue. A new sync queue will be |
@@ -2993,14 +2848,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) | |||
2993 | cic_set_cfqq(cic, NULL, 1); | 2848 | cic_set_cfqq(cic, NULL, 1); |
2994 | cfq_put_queue(sync_cfqq); | 2849 | cfq_put_queue(sync_cfqq); |
2995 | } | 2850 | } |
2996 | |||
2997 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2998 | } | ||
2999 | |||
3000 | static void cfq_ioc_set_cgroup(struct io_context *ioc) | ||
3001 | { | ||
3002 | call_for_each_cic(ioc, changed_cgroup); | ||
3003 | ioc->cgroup_changed = 0; | ||
3004 | } | 2851 | } |
3005 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 2852 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
3006 | 2853 | ||
@@ -3009,7 +2856,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, | |||
3009 | struct io_context *ioc, gfp_t gfp_mask) | 2856 | struct io_context *ioc, gfp_t gfp_mask) |
3010 | { | 2857 | { |
3011 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 2858 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
3012 | struct cfq_io_context *cic; | 2859 | struct cfq_io_cq *cic; |
3013 | struct cfq_group *cfqg; | 2860 | struct cfq_group *cfqg; |
3014 | 2861 | ||
3015 | retry: | 2862 | retry: |
@@ -3100,160 +2947,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, | |||
3100 | return cfqq; | 2947 | return cfqq; |
3101 | } | 2948 | } |
3102 | 2949 | ||
3103 | /* | ||
3104 | * We drop cfq io contexts lazily, so we may find a dead one. | ||
3105 | */ | ||
3106 | static void | ||
3107 | cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, | ||
3108 | struct cfq_io_context *cic) | ||
3109 | { | ||
3110 | unsigned long flags; | ||
3111 | |||
3112 | WARN_ON(!list_empty(&cic->queue_list)); | ||
3113 | BUG_ON(cic->key != cfqd_dead_key(cfqd)); | ||
3114 | |||
3115 | spin_lock_irqsave(&ioc->lock, flags); | ||
3116 | |||
3117 | BUG_ON(rcu_dereference_check(ioc->ioc_data, | ||
3118 | lockdep_is_held(&ioc->lock)) == cic); | ||
3119 | |||
3120 | radix_tree_delete(&ioc->radix_root, cfqd->cic_index); | ||
3121 | hlist_del_rcu(&cic->cic_list); | ||
3122 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
3123 | |||
3124 | cfq_cic_free(cic); | ||
3125 | } | ||
3126 | |||
3127 | static struct cfq_io_context * | ||
3128 | cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) | ||
3129 | { | ||
3130 | struct cfq_io_context *cic; | ||
3131 | unsigned long flags; | ||
3132 | |||
3133 | if (unlikely(!ioc)) | ||
3134 | return NULL; | ||
3135 | |||
3136 | rcu_read_lock(); | ||
3137 | |||
3138 | /* | ||
3139 | * we maintain a last-hit cache, to avoid browsing over the tree | ||
3140 | */ | ||
3141 | cic = rcu_dereference(ioc->ioc_data); | ||
3142 | if (cic && cic->key == cfqd) { | ||
3143 | rcu_read_unlock(); | ||
3144 | return cic; | ||
3145 | } | ||
3146 | |||
3147 | do { | ||
3148 | cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); | ||
3149 | rcu_read_unlock(); | ||
3150 | if (!cic) | ||
3151 | break; | ||
3152 | if (unlikely(cic->key != cfqd)) { | ||
3153 | cfq_drop_dead_cic(cfqd, ioc, cic); | ||
3154 | rcu_read_lock(); | ||
3155 | continue; | ||
3156 | } | ||
3157 | |||
3158 | spin_lock_irqsave(&ioc->lock, flags); | ||
3159 | rcu_assign_pointer(ioc->ioc_data, cic); | ||
3160 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
3161 | break; | ||
3162 | } while (1); | ||
3163 | |||
3164 | return cic; | ||
3165 | } | ||
3166 | |||
3167 | /* | ||
3168 | * Add cic into ioc, using cfqd as the search key. This enables us to lookup | ||
3169 | * the process specific cfq io context when entered from the block layer. | ||
3170 | * Also adds the cic to a per-cfqd list, used when this queue is removed. | ||
3171 | */ | ||
3172 | static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
3173 | struct cfq_io_context *cic, gfp_t gfp_mask) | ||
3174 | { | ||
3175 | unsigned long flags; | ||
3176 | int ret; | ||
3177 | |||
3178 | ret = radix_tree_preload(gfp_mask); | ||
3179 | if (!ret) { | ||
3180 | cic->ioc = ioc; | ||
3181 | cic->key = cfqd; | ||
3182 | |||
3183 | spin_lock_irqsave(&ioc->lock, flags); | ||
3184 | ret = radix_tree_insert(&ioc->radix_root, | ||
3185 | cfqd->cic_index, cic); | ||
3186 | if (!ret) | ||
3187 | hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | ||
3188 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
3189 | |||
3190 | radix_tree_preload_end(); | ||
3191 | |||
3192 | if (!ret) { | ||
3193 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
3194 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
3195 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
3196 | } | ||
3197 | } | ||
3198 | |||
3199 | if (ret && ret != -EEXIST) | ||
3200 | printk(KERN_ERR "cfq: cic link failed!\n"); | ||
3201 | |||
3202 | return ret; | ||
3203 | } | ||
3204 | |||
3205 | /* | ||
3206 | * Setup general io context and cfq io context. There can be several cfq | ||
3207 | * io contexts per general io context, if this process is doing io to more | ||
3208 | * than one device managed by cfq. | ||
3209 | */ | ||
3210 | static struct cfq_io_context * | ||
3211 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | ||
3212 | { | ||
3213 | struct io_context *ioc = NULL; | ||
3214 | struct cfq_io_context *cic; | ||
3215 | int ret; | ||
3216 | |||
3217 | might_sleep_if(gfp_mask & __GFP_WAIT); | ||
3218 | |||
3219 | ioc = get_io_context(gfp_mask, cfqd->queue->node); | ||
3220 | if (!ioc) | ||
3221 | return NULL; | ||
3222 | |||
3223 | retry: | ||
3224 | cic = cfq_cic_lookup(cfqd, ioc); | ||
3225 | if (cic) | ||
3226 | goto out; | ||
3227 | |||
3228 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
3229 | if (cic == NULL) | ||
3230 | goto err; | ||
3231 | |||
3232 | ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask); | ||
3233 | if (ret == -EEXIST) { | ||
3234 | /* someone has linked cic to ioc already */ | ||
3235 | cfq_cic_free(cic); | ||
3236 | goto retry; | ||
3237 | } else if (ret) | ||
3238 | goto err_free; | ||
3239 | |||
3240 | out: | ||
3241 | smp_read_barrier_depends(); | ||
3242 | if (unlikely(ioc->ioprio_changed)) | ||
3243 | cfq_ioc_set_ioprio(ioc); | ||
3244 | |||
3245 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
3246 | if (unlikely(ioc->cgroup_changed)) | ||
3247 | cfq_ioc_set_cgroup(ioc); | ||
3248 | #endif | ||
3249 | return cic; | ||
3250 | err_free: | ||
3251 | cfq_cic_free(cic); | ||
3252 | err: | ||
3253 | put_io_context(ioc); | ||
3254 | return NULL; | ||
3255 | } | ||
3256 | |||
3257 | static void | 2950 | static void |
3258 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) | 2951 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) |
3259 | { | 2952 | { |
@@ -3267,7 +2960,7 @@ __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) | |||
3267 | 2960 | ||
3268 | static void | 2961 | static void |
3269 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 2962 | cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3270 | struct cfq_io_context *cic) | 2963 | struct cfq_io_cq *cic) |
3271 | { | 2964 | { |
3272 | if (cfq_cfqq_sync(cfqq)) { | 2965 | if (cfq_cfqq_sync(cfqq)) { |
3273 | __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); | 2966 | __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); |
@@ -3305,7 +2998,7 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3305 | */ | 2998 | */ |
3306 | static void | 2999 | static void |
3307 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3000 | cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3308 | struct cfq_io_context *cic) | 3001 | struct cfq_io_cq *cic) |
3309 | { | 3002 | { |
3310 | int old_idle, enable_idle; | 3003 | int old_idle, enable_idle; |
3311 | 3004 | ||
@@ -3322,8 +3015,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3322 | 3015 | ||
3323 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) | 3016 | if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) |
3324 | enable_idle = 0; | 3017 | enable_idle = 0; |
3325 | else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || | 3018 | else if (!atomic_read(&cic->icq.ioc->nr_tasks) || |
3326 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | 3019 | !cfqd->cfq_slice_idle || |
3020 | (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) | ||
3327 | enable_idle = 0; | 3021 | enable_idle = 0; |
3328 | else if (sample_valid(cic->ttime.ttime_samples)) { | 3022 | else if (sample_valid(cic->ttime.ttime_samples)) { |
3329 | if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) | 3023 | if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) |
@@ -3455,7 +3149,7 @@ static void | |||
3455 | cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 3149 | cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
3456 | struct request *rq) | 3150 | struct request *rq) |
3457 | { | 3151 | { |
3458 | struct cfq_io_context *cic = RQ_CIC(rq); | 3152 | struct cfq_io_cq *cic = RQ_CIC(rq); |
3459 | 3153 | ||
3460 | cfqd->rq_queued++; | 3154 | cfqd->rq_queued++; |
3461 | if (rq->cmd_flags & REQ_PRIO) | 3155 | if (rq->cmd_flags & REQ_PRIO) |
@@ -3508,7 +3202,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
3508 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 3202 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
3509 | 3203 | ||
3510 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); | 3204 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); |
3511 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); | 3205 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc); |
3512 | 3206 | ||
3513 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 3207 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
3514 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 3208 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
@@ -3558,7 +3252,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) | |||
3558 | 3252 | ||
3559 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3253 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3560 | { | 3254 | { |
3561 | struct cfq_io_context *cic = cfqd->active_cic; | 3255 | struct cfq_io_cq *cic = cfqd->active_cic; |
3562 | 3256 | ||
3563 | /* If the queue already has requests, don't wait */ | 3257 | /* If the queue already has requests, don't wait */ |
3564 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | 3258 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
@@ -3695,7 +3389,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
3695 | { | 3389 | { |
3696 | struct cfq_data *cfqd = q->elevator->elevator_data; | 3390 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3697 | struct task_struct *tsk = current; | 3391 | struct task_struct *tsk = current; |
3698 | struct cfq_io_context *cic; | 3392 | struct cfq_io_cq *cic; |
3699 | struct cfq_queue *cfqq; | 3393 | struct cfq_queue *cfqq; |
3700 | 3394 | ||
3701 | /* | 3395 | /* |
@@ -3710,7 +3404,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
3710 | 3404 | ||
3711 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); | 3405 | cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); |
3712 | if (cfqq) { | 3406 | if (cfqq) { |
3713 | cfq_init_prio_data(cfqq, cic->ioc); | 3407 | cfq_init_prio_data(cfqq, cic->icq.ioc); |
3714 | 3408 | ||
3715 | return __cfq_may_queue(cfqq); | 3409 | return __cfq_may_queue(cfqq); |
3716 | } | 3410 | } |
@@ -3731,21 +3425,17 @@ static void cfq_put_request(struct request *rq) | |||
3731 | BUG_ON(!cfqq->allocated[rw]); | 3425 | BUG_ON(!cfqq->allocated[rw]); |
3732 | cfqq->allocated[rw]--; | 3426 | cfqq->allocated[rw]--; |
3733 | 3427 | ||
3734 | put_io_context(RQ_CIC(rq)->ioc); | ||
3735 | |||
3736 | rq->elevator_private[0] = NULL; | ||
3737 | rq->elevator_private[1] = NULL; | ||
3738 | |||
3739 | /* Put down rq reference on cfqg */ | 3428 | /* Put down rq reference on cfqg */ |
3740 | cfq_put_cfqg(RQ_CFQG(rq)); | 3429 | cfq_put_cfqg(RQ_CFQG(rq)); |
3741 | rq->elevator_private[2] = NULL; | 3430 | rq->elv.priv[0] = NULL; |
3431 | rq->elv.priv[1] = NULL; | ||
3742 | 3432 | ||
3743 | cfq_put_queue(cfqq); | 3433 | cfq_put_queue(cfqq); |
3744 | } | 3434 | } |
3745 | } | 3435 | } |
3746 | 3436 | ||
3747 | static struct cfq_queue * | 3437 | static struct cfq_queue * |
3748 | cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, | 3438 | cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic, |
3749 | struct cfq_queue *cfqq) | 3439 | struct cfq_queue *cfqq) |
3750 | { | 3440 | { |
3751 | cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); | 3441 | cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq); |
@@ -3760,7 +3450,7 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, | |||
3760 | * was the last process referring to said cfqq. | 3450 | * was the last process referring to said cfqq. |
3761 | */ | 3451 | */ |
3762 | static struct cfq_queue * | 3452 | static struct cfq_queue * |
3763 | split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) | 3453 | split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq) |
3764 | { | 3454 | { |
3765 | if (cfqq_process_refs(cfqq) == 1) { | 3455 | if (cfqq_process_refs(cfqq) == 1) { |
3766 | cfqq->pid = current->pid; | 3456 | cfqq->pid = current->pid; |
@@ -3783,25 +3473,29 @@ static int | |||
3783 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 3473 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
3784 | { | 3474 | { |
3785 | struct cfq_data *cfqd = q->elevator->elevator_data; | 3475 | struct cfq_data *cfqd = q->elevator->elevator_data; |
3786 | struct cfq_io_context *cic; | 3476 | struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); |
3787 | const int rw = rq_data_dir(rq); | 3477 | const int rw = rq_data_dir(rq); |
3788 | const bool is_sync = rq_is_sync(rq); | 3478 | const bool is_sync = rq_is_sync(rq); |
3789 | struct cfq_queue *cfqq; | 3479 | struct cfq_queue *cfqq; |
3790 | unsigned long flags; | ||
3791 | 3480 | ||
3792 | might_sleep_if(gfp_mask & __GFP_WAIT); | 3481 | might_sleep_if(gfp_mask & __GFP_WAIT); |
3793 | 3482 | ||
3794 | cic = cfq_get_io_context(cfqd, gfp_mask); | 3483 | spin_lock_irq(q->queue_lock); |
3795 | |||
3796 | spin_lock_irqsave(q->queue_lock, flags); | ||
3797 | 3484 | ||
3798 | if (!cic) | 3485 | /* handle changed notifications */ |
3799 | goto queue_fail; | 3486 | if (unlikely(cic->icq.changed)) { |
3487 | if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed)) | ||
3488 | changed_ioprio(cic); | ||
3489 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
3490 | if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed)) | ||
3491 | changed_cgroup(cic); | ||
3492 | #endif | ||
3493 | } | ||
3800 | 3494 | ||
3801 | new_queue: | 3495 | new_queue: |
3802 | cfqq = cic_to_cfqq(cic, is_sync); | 3496 | cfqq = cic_to_cfqq(cic, is_sync); |
3803 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { | 3497 | if (!cfqq || cfqq == &cfqd->oom_cfqq) { |
3804 | cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); | 3498 | cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask); |
3805 | cic_set_cfqq(cic, cfqq, is_sync); | 3499 | cic_set_cfqq(cic, cfqq, is_sync); |
3806 | } else { | 3500 | } else { |
3807 | /* | 3501 | /* |
@@ -3827,17 +3521,10 @@ new_queue: | |||
3827 | cfqq->allocated[rw]++; | 3521 | cfqq->allocated[rw]++; |
3828 | 3522 | ||
3829 | cfqq->ref++; | 3523 | cfqq->ref++; |
3830 | rq->elevator_private[0] = cic; | 3524 | rq->elv.priv[0] = cfqq; |
3831 | rq->elevator_private[1] = cfqq; | 3525 | rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); |
3832 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | 3526 | spin_unlock_irq(q->queue_lock); |
3833 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3834 | return 0; | 3527 | return 0; |
3835 | |||
3836 | queue_fail: | ||
3837 | cfq_schedule_dispatch(cfqd); | ||
3838 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3839 | cfq_log(cfqd, "set_request fail"); | ||
3840 | return 1; | ||
3841 | } | 3528 | } |
3842 | 3529 | ||
3843 | static void cfq_kick_queue(struct work_struct *work) | 3530 | static void cfq_kick_queue(struct work_struct *work) |
@@ -3941,14 +3628,6 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3941 | if (cfqd->active_queue) | 3628 | if (cfqd->active_queue) |
3942 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); | 3629 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); |
3943 | 3630 | ||
3944 | while (!list_empty(&cfqd->cic_list)) { | ||
3945 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, | ||
3946 | struct cfq_io_context, | ||
3947 | queue_list); | ||
3948 | |||
3949 | __cfq_exit_single_io_context(cfqd, cic); | ||
3950 | } | ||
3951 | |||
3952 | cfq_put_async_queues(cfqd); | 3631 | cfq_put_async_queues(cfqd); |
3953 | cfq_release_cfq_groups(cfqd); | 3632 | cfq_release_cfq_groups(cfqd); |
3954 | 3633 | ||
@@ -3963,10 +3642,6 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3963 | 3642 | ||
3964 | cfq_shutdown_timer_wq(cfqd); | 3643 | cfq_shutdown_timer_wq(cfqd); |
3965 | 3644 | ||
3966 | spin_lock(&cic_index_lock); | ||
3967 | ida_remove(&cic_index_ida, cfqd->cic_index); | ||
3968 | spin_unlock(&cic_index_lock); | ||
3969 | |||
3970 | /* | 3645 | /* |
3971 | * Wait for cfqg->blkg->key accessors to exit their grace periods. | 3646 | * Wait for cfqg->blkg->key accessors to exit their grace periods. |
3972 | * Do this wait only if there are other unlinked groups out | 3647 | * Do this wait only if there are other unlinked groups out |
@@ -3988,24 +3663,6 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3988 | kfree(cfqd); | 3663 | kfree(cfqd); |
3989 | } | 3664 | } |
3990 | 3665 | ||
3991 | static int cfq_alloc_cic_index(void) | ||
3992 | { | ||
3993 | int index, error; | ||
3994 | |||
3995 | do { | ||
3996 | if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) | ||
3997 | return -ENOMEM; | ||
3998 | |||
3999 | spin_lock(&cic_index_lock); | ||
4000 | error = ida_get_new(&cic_index_ida, &index); | ||
4001 | spin_unlock(&cic_index_lock); | ||
4002 | if (error && error != -EAGAIN) | ||
4003 | return error; | ||
4004 | } while (error); | ||
4005 | |||
4006 | return index; | ||
4007 | } | ||
4008 | |||
4009 | static void *cfq_init_queue(struct request_queue *q) | 3666 | static void *cfq_init_queue(struct request_queue *q) |
4010 | { | 3667 | { |
4011 | struct cfq_data *cfqd; | 3668 | struct cfq_data *cfqd; |
@@ -4013,23 +3670,9 @@ static void *cfq_init_queue(struct request_queue *q) | |||
4013 | struct cfq_group *cfqg; | 3670 | struct cfq_group *cfqg; |
4014 | struct cfq_rb_root *st; | 3671 | struct cfq_rb_root *st; |
4015 | 3672 | ||
4016 | i = cfq_alloc_cic_index(); | ||
4017 | if (i < 0) | ||
4018 | return NULL; | ||
4019 | |||
4020 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3673 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
4021 | if (!cfqd) { | 3674 | if (!cfqd) |
4022 | spin_lock(&cic_index_lock); | ||
4023 | ida_remove(&cic_index_ida, i); | ||
4024 | spin_unlock(&cic_index_lock); | ||
4025 | return NULL; | 3675 | return NULL; |
4026 | } | ||
4027 | |||
4028 | /* | ||
4029 | * Don't need take queue_lock in the routine, since we are | ||
4030 | * initializing the ioscheduler, and nobody is using cfqd | ||
4031 | */ | ||
4032 | cfqd->cic_index = i; | ||
4033 | 3676 | ||
4034 | /* Init root service tree */ | 3677 | /* Init root service tree */ |
4035 | cfqd->grp_service_tree = CFQ_RB_ROOT; | 3678 | cfqd->grp_service_tree = CFQ_RB_ROOT; |
@@ -4055,11 +3698,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
4055 | 3698 | ||
4056 | if (blkio_alloc_blkg_stats(&cfqg->blkg)) { | 3699 | if (blkio_alloc_blkg_stats(&cfqg->blkg)) { |
4057 | kfree(cfqg); | 3700 | kfree(cfqg); |
4058 | |||
4059 | spin_lock(&cic_index_lock); | ||
4060 | ida_remove(&cic_index_ida, cfqd->cic_index); | ||
4061 | spin_unlock(&cic_index_lock); | ||
4062 | |||
4063 | kfree(cfqd); | 3701 | kfree(cfqd); |
4064 | return NULL; | 3702 | return NULL; |
4065 | } | 3703 | } |
@@ -4091,8 +3729,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
4091 | cfqd->oom_cfqq.ref++; | 3729 | cfqd->oom_cfqq.ref++; |
4092 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); | 3730 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); |
4093 | 3731 | ||
4094 | INIT_LIST_HEAD(&cfqd->cic_list); | ||
4095 | |||
4096 | cfqd->queue = q; | 3732 | cfqd->queue = q; |
4097 | 3733 | ||
4098 | init_timer(&cfqd->idle_slice_timer); | 3734 | init_timer(&cfqd->idle_slice_timer); |
@@ -4121,34 +3757,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
4121 | return cfqd; | 3757 | return cfqd; |
4122 | } | 3758 | } |
4123 | 3759 | ||
4124 | static void cfq_slab_kill(void) | ||
4125 | { | ||
4126 | /* | ||
4127 | * Caller already ensured that pending RCU callbacks are completed, | ||
4128 | * so we should have no busy allocations at this point. | ||
4129 | */ | ||
4130 | if (cfq_pool) | ||
4131 | kmem_cache_destroy(cfq_pool); | ||
4132 | if (cfq_ioc_pool) | ||
4133 | kmem_cache_destroy(cfq_ioc_pool); | ||
4134 | } | ||
4135 | |||
4136 | static int __init cfq_slab_setup(void) | ||
4137 | { | ||
4138 | cfq_pool = KMEM_CACHE(cfq_queue, 0); | ||
4139 | if (!cfq_pool) | ||
4140 | goto fail; | ||
4141 | |||
4142 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); | ||
4143 | if (!cfq_ioc_pool) | ||
4144 | goto fail; | ||
4145 | |||
4146 | return 0; | ||
4147 | fail: | ||
4148 | cfq_slab_kill(); | ||
4149 | return -ENOMEM; | ||
4150 | } | ||
4151 | |||
4152 | /* | 3760 | /* |
4153 | * sysfs parts below --> | 3761 | * sysfs parts below --> |
4154 | */ | 3762 | */ |
@@ -4254,15 +3862,18 @@ static struct elevator_type iosched_cfq = { | |||
4254 | .elevator_completed_req_fn = cfq_completed_request, | 3862 | .elevator_completed_req_fn = cfq_completed_request, |
4255 | .elevator_former_req_fn = elv_rb_former_request, | 3863 | .elevator_former_req_fn = elv_rb_former_request, |
4256 | .elevator_latter_req_fn = elv_rb_latter_request, | 3864 | .elevator_latter_req_fn = elv_rb_latter_request, |
3865 | .elevator_init_icq_fn = cfq_init_icq, | ||
3866 | .elevator_exit_icq_fn = cfq_exit_icq, | ||
4257 | .elevator_set_req_fn = cfq_set_request, | 3867 | .elevator_set_req_fn = cfq_set_request, |
4258 | .elevator_put_req_fn = cfq_put_request, | 3868 | .elevator_put_req_fn = cfq_put_request, |
4259 | .elevator_may_queue_fn = cfq_may_queue, | 3869 | .elevator_may_queue_fn = cfq_may_queue, |
4260 | .elevator_init_fn = cfq_init_queue, | 3870 | .elevator_init_fn = cfq_init_queue, |
4261 | .elevator_exit_fn = cfq_exit_queue, | 3871 | .elevator_exit_fn = cfq_exit_queue, |
4262 | .trim = cfq_free_io_context, | ||
4263 | }, | 3872 | }, |
3873 | .icq_size = sizeof(struct cfq_io_cq), | ||
3874 | .icq_align = __alignof__(struct cfq_io_cq), | ||
4264 | .elevator_attrs = cfq_attrs, | 3875 | .elevator_attrs = cfq_attrs, |
4265 | .elevator_name = "cfq", | 3876 | .elevator_name = "cfq", |
4266 | .elevator_owner = THIS_MODULE, | 3877 | .elevator_owner = THIS_MODULE, |
4267 | }; | 3878 | }; |
4268 | 3879 | ||
@@ -4280,6 +3891,8 @@ static struct blkio_policy_type blkio_policy_cfq; | |||
4280 | 3891 | ||
4281 | static int __init cfq_init(void) | 3892 | static int __init cfq_init(void) |
4282 | { | 3893 | { |
3894 | int ret; | ||
3895 | |||
4283 | /* | 3896 | /* |
4284 | * could be 0 on HZ < 1000 setups | 3897 | * could be 0 on HZ < 1000 setups |
4285 | */ | 3898 | */ |
@@ -4294,10 +3907,16 @@ static int __init cfq_init(void) | |||
4294 | #else | 3907 | #else |
4295 | cfq_group_idle = 0; | 3908 | cfq_group_idle = 0; |
4296 | #endif | 3909 | #endif |
4297 | if (cfq_slab_setup()) | 3910 | cfq_pool = KMEM_CACHE(cfq_queue, 0); |
3911 | if (!cfq_pool) | ||
4298 | return -ENOMEM; | 3912 | return -ENOMEM; |
4299 | 3913 | ||
4300 | elv_register(&iosched_cfq); | 3914 | ret = elv_register(&iosched_cfq); |
3915 | if (ret) { | ||
3916 | kmem_cache_destroy(cfq_pool); | ||
3917 | return ret; | ||
3918 | } | ||
3919 | |||
4301 | blkio_policy_register(&blkio_policy_cfq); | 3920 | blkio_policy_register(&blkio_policy_cfq); |
4302 | 3921 | ||
4303 | return 0; | 3922 | return 0; |
@@ -4305,21 +3924,9 @@ static int __init cfq_init(void) | |||
4305 | 3924 | ||
4306 | static void __exit cfq_exit(void) | 3925 | static void __exit cfq_exit(void) |
4307 | { | 3926 | { |
4308 | DECLARE_COMPLETION_ONSTACK(all_gone); | ||
4309 | blkio_policy_unregister(&blkio_policy_cfq); | 3927 | blkio_policy_unregister(&blkio_policy_cfq); |
4310 | elv_unregister(&iosched_cfq); | 3928 | elv_unregister(&iosched_cfq); |
4311 | ioc_gone = &all_gone; | 3929 | kmem_cache_destroy(cfq_pool); |
4312 | /* ioc_gone's update must be visible before reading ioc_count */ | ||
4313 | smp_wmb(); | ||
4314 | |||
4315 | /* | ||
4316 | * this also protects us from entering cfq_slab_kill() with | ||
4317 | * pending RCU callbacks | ||
4318 | */ | ||
4319 | if (elv_ioc_count_read(cfq_ioc_count)) | ||
4320 | wait_for_completion(&all_gone); | ||
4321 | ida_destroy(&cic_index_ida); | ||
4322 | cfq_slab_kill(); | ||
4323 | } | 3930 | } |
4324 | 3931 | ||
4325 | module_init(cfq_init); | 3932 | module_init(cfq_init); |
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7b725020823c..7c668c8a6f95 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c | |||
@@ -719,6 +719,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
719 | case BLKSECTGET: | 719 | case BLKSECTGET: |
720 | return compat_put_ushort(arg, | 720 | return compat_put_ushort(arg, |
721 | queue_max_sectors(bdev_get_queue(bdev))); | 721 | queue_max_sectors(bdev_get_queue(bdev))); |
722 | case BLKROTATIONAL: | ||
723 | return compat_put_ushort(arg, | ||
724 | !blk_queue_nonrot(bdev_get_queue(bdev))); | ||
722 | case BLKRASET: /* compatible, but no compat_ptr (!) */ | 725 | case BLKRASET: /* compatible, but no compat_ptr (!) */ |
723 | case BLKFRASET: | 726 | case BLKFRASET: |
724 | if (!capable(CAP_SYS_ADMIN)) | 727 | if (!capable(CAP_SYS_ADMIN)) |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c644137d9cd6..7bf12d793fcd 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -448,9 +448,7 @@ static struct elevator_type iosched_deadline = { | |||
448 | 448 | ||
449 | static int __init deadline_init(void) | 449 | static int __init deadline_init(void) |
450 | { | 450 | { |
451 | elv_register(&iosched_deadline); | 451 | return elv_register(&iosched_deadline); |
452 | |||
453 | return 0; | ||
454 | } | 452 | } |
455 | 453 | ||
456 | static void __exit deadline_exit(void) | 454 | static void __exit deadline_exit(void) |
diff --git a/block/elevator.c b/block/elevator.c index 66343d6917d0..91e18f8af9be 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -61,8 +61,8 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) | |||
61 | struct request_queue *q = rq->q; | 61 | struct request_queue *q = rq->q; |
62 | struct elevator_queue *e = q->elevator; | 62 | struct elevator_queue *e = q->elevator; |
63 | 63 | ||
64 | if (e->ops->elevator_allow_merge_fn) | 64 | if (e->type->ops.elevator_allow_merge_fn) |
65 | return e->ops->elevator_allow_merge_fn(q, rq, bio); | 65 | return e->type->ops.elevator_allow_merge_fn(q, rq, bio); |
66 | 66 | ||
67 | return 1; | 67 | return 1; |
68 | } | 68 | } |
@@ -168,17 +168,13 @@ static struct elevator_type *elevator_get(const char *name) | |||
168 | return e; | 168 | return e; |
169 | } | 169 | } |
170 | 170 | ||
171 | static void *elevator_init_queue(struct request_queue *q, | 171 | static int elevator_init_queue(struct request_queue *q, |
172 | struct elevator_queue *eq) | 172 | struct elevator_queue *eq) |
173 | { | 173 | { |
174 | return eq->ops->elevator_init_fn(q); | 174 | eq->elevator_data = eq->type->ops.elevator_init_fn(q); |
175 | } | 175 | if (eq->elevator_data) |
176 | 176 | return 0; | |
177 | static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, | 177 | return -ENOMEM; |
178 | void *data) | ||
179 | { | ||
180 | q->elevator = eq; | ||
181 | eq->elevator_data = data; | ||
182 | } | 178 | } |
183 | 179 | ||
184 | static char chosen_elevator[ELV_NAME_MAX]; | 180 | static char chosen_elevator[ELV_NAME_MAX]; |
@@ -207,8 +203,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, | |||
207 | if (unlikely(!eq)) | 203 | if (unlikely(!eq)) |
208 | goto err; | 204 | goto err; |
209 | 205 | ||
210 | eq->ops = &e->ops; | 206 | eq->type = e; |
211 | eq->elevator_type = e; | ||
212 | kobject_init(&eq->kobj, &elv_ktype); | 207 | kobject_init(&eq->kobj, &elv_ktype); |
213 | mutex_init(&eq->sysfs_lock); | 208 | mutex_init(&eq->sysfs_lock); |
214 | 209 | ||
@@ -232,7 +227,7 @@ static void elevator_release(struct kobject *kobj) | |||
232 | struct elevator_queue *e; | 227 | struct elevator_queue *e; |
233 | 228 | ||
234 | e = container_of(kobj, struct elevator_queue, kobj); | 229 | e = container_of(kobj, struct elevator_queue, kobj); |
235 | elevator_put(e->elevator_type); | 230 | elevator_put(e->type); |
236 | kfree(e->hash); | 231 | kfree(e->hash); |
237 | kfree(e); | 232 | kfree(e); |
238 | } | 233 | } |
@@ -241,7 +236,7 @@ int elevator_init(struct request_queue *q, char *name) | |||
241 | { | 236 | { |
242 | struct elevator_type *e = NULL; | 237 | struct elevator_type *e = NULL; |
243 | struct elevator_queue *eq; | 238 | struct elevator_queue *eq; |
244 | void *data; | 239 | int err; |
245 | 240 | ||
246 | if (unlikely(q->elevator)) | 241 | if (unlikely(q->elevator)) |
247 | return 0; | 242 | return 0; |
@@ -278,13 +273,13 @@ int elevator_init(struct request_queue *q, char *name) | |||
278 | if (!eq) | 273 | if (!eq) |
279 | return -ENOMEM; | 274 | return -ENOMEM; |
280 | 275 | ||
281 | data = elevator_init_queue(q, eq); | 276 | err = elevator_init_queue(q, eq); |
282 | if (!data) { | 277 | if (err) { |
283 | kobject_put(&eq->kobj); | 278 | kobject_put(&eq->kobj); |
284 | return -ENOMEM; | 279 | return err; |
285 | } | 280 | } |
286 | 281 | ||
287 | elevator_attach(q, eq, data); | 282 | q->elevator = eq; |
288 | return 0; | 283 | return 0; |
289 | } | 284 | } |
290 | EXPORT_SYMBOL(elevator_init); | 285 | EXPORT_SYMBOL(elevator_init); |
@@ -292,9 +287,8 @@ EXPORT_SYMBOL(elevator_init); | |||
292 | void elevator_exit(struct elevator_queue *e) | 287 | void elevator_exit(struct elevator_queue *e) |
293 | { | 288 | { |
294 | mutex_lock(&e->sysfs_lock); | 289 | mutex_lock(&e->sysfs_lock); |
295 | if (e->ops->elevator_exit_fn) | 290 | if (e->type->ops.elevator_exit_fn) |
296 | e->ops->elevator_exit_fn(e); | 291 | e->type->ops.elevator_exit_fn(e); |
297 | e->ops = NULL; | ||
298 | mutex_unlock(&e->sysfs_lock); | 292 | mutex_unlock(&e->sysfs_lock); |
299 | 293 | ||
300 | kobject_put(&e->kobj); | 294 | kobject_put(&e->kobj); |
@@ -504,8 +498,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
504 | return ELEVATOR_BACK_MERGE; | 498 | return ELEVATOR_BACK_MERGE; |
505 | } | 499 | } |
506 | 500 | ||
507 | if (e->ops->elevator_merge_fn) | 501 | if (e->type->ops.elevator_merge_fn) |
508 | return e->ops->elevator_merge_fn(q, req, bio); | 502 | return e->type->ops.elevator_merge_fn(q, req, bio); |
509 | 503 | ||
510 | return ELEVATOR_NO_MERGE; | 504 | return ELEVATOR_NO_MERGE; |
511 | } | 505 | } |
@@ -548,8 +542,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type) | |||
548 | { | 542 | { |
549 | struct elevator_queue *e = q->elevator; | 543 | struct elevator_queue *e = q->elevator; |
550 | 544 | ||
551 | if (e->ops->elevator_merged_fn) | 545 | if (e->type->ops.elevator_merged_fn) |
552 | e->ops->elevator_merged_fn(q, rq, type); | 546 | e->type->ops.elevator_merged_fn(q, rq, type); |
553 | 547 | ||
554 | if (type == ELEVATOR_BACK_MERGE) | 548 | if (type == ELEVATOR_BACK_MERGE) |
555 | elv_rqhash_reposition(q, rq); | 549 | elv_rqhash_reposition(q, rq); |
@@ -563,8 +557,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
563 | struct elevator_queue *e = q->elevator; | 557 | struct elevator_queue *e = q->elevator; |
564 | const int next_sorted = next->cmd_flags & REQ_SORTED; | 558 | const int next_sorted = next->cmd_flags & REQ_SORTED; |
565 | 559 | ||
566 | if (next_sorted && e->ops->elevator_merge_req_fn) | 560 | if (next_sorted && e->type->ops.elevator_merge_req_fn) |
567 | e->ops->elevator_merge_req_fn(q, rq, next); | 561 | e->type->ops.elevator_merge_req_fn(q, rq, next); |
568 | 562 | ||
569 | elv_rqhash_reposition(q, rq); | 563 | elv_rqhash_reposition(q, rq); |
570 | 564 | ||
@@ -581,8 +575,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, | |||
581 | { | 575 | { |
582 | struct elevator_queue *e = q->elevator; | 576 | struct elevator_queue *e = q->elevator; |
583 | 577 | ||
584 | if (e->ops->elevator_bio_merged_fn) | 578 | if (e->type->ops.elevator_bio_merged_fn) |
585 | e->ops->elevator_bio_merged_fn(q, rq, bio); | 579 | e->type->ops.elevator_bio_merged_fn(q, rq, bio); |
586 | } | 580 | } |
587 | 581 | ||
588 | void elv_requeue_request(struct request_queue *q, struct request *rq) | 582 | void elv_requeue_request(struct request_queue *q, struct request *rq) |
@@ -608,12 +602,12 @@ void elv_drain_elevator(struct request_queue *q) | |||
608 | 602 | ||
609 | lockdep_assert_held(q->queue_lock); | 603 | lockdep_assert_held(q->queue_lock); |
610 | 604 | ||
611 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 605 | while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) |
612 | ; | 606 | ; |
613 | if (q->nr_sorted && printed++ < 10) { | 607 | if (q->nr_sorted && printed++ < 10) { |
614 | printk(KERN_ERR "%s: forced dispatching is broken " | 608 | printk(KERN_ERR "%s: forced dispatching is broken " |
615 | "(nr_sorted=%u), please report this\n", | 609 | "(nr_sorted=%u), please report this\n", |
616 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | 610 | q->elevator->type->elevator_name, q->nr_sorted); |
617 | } | 611 | } |
618 | } | 612 | } |
619 | 613 | ||
@@ -702,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
702 | * rq cannot be accessed after calling | 696 | * rq cannot be accessed after calling |
703 | * elevator_add_req_fn. | 697 | * elevator_add_req_fn. |
704 | */ | 698 | */ |
705 | q->elevator->ops->elevator_add_req_fn(q, rq); | 699 | q->elevator->type->ops.elevator_add_req_fn(q, rq); |
706 | break; | 700 | break; |
707 | 701 | ||
708 | case ELEVATOR_INSERT_FLUSH: | 702 | case ELEVATOR_INSERT_FLUSH: |
@@ -731,8 +725,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) | |||
731 | { | 725 | { |
732 | struct elevator_queue *e = q->elevator; | 726 | struct elevator_queue *e = q->elevator; |
733 | 727 | ||
734 | if (e->ops->elevator_latter_req_fn) | 728 | if (e->type->ops.elevator_latter_req_fn) |
735 | return e->ops->elevator_latter_req_fn(q, rq); | 729 | return e->type->ops.elevator_latter_req_fn(q, rq); |
736 | return NULL; | 730 | return NULL; |
737 | } | 731 | } |
738 | 732 | ||
@@ -740,8 +734,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) | |||
740 | { | 734 | { |
741 | struct elevator_queue *e = q->elevator; | 735 | struct elevator_queue *e = q->elevator; |
742 | 736 | ||
743 | if (e->ops->elevator_former_req_fn) | 737 | if (e->type->ops.elevator_former_req_fn) |
744 | return e->ops->elevator_former_req_fn(q, rq); | 738 | return e->type->ops.elevator_former_req_fn(q, rq); |
745 | return NULL; | 739 | return NULL; |
746 | } | 740 | } |
747 | 741 | ||
@@ -749,10 +743,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
749 | { | 743 | { |
750 | struct elevator_queue *e = q->elevator; | 744 | struct elevator_queue *e = q->elevator; |
751 | 745 | ||
752 | if (e->ops->elevator_set_req_fn) | 746 | if (e->type->ops.elevator_set_req_fn) |
753 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | 747 | return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask); |
754 | |||
755 | rq->elevator_private[0] = NULL; | ||
756 | return 0; | 748 | return 0; |
757 | } | 749 | } |
758 | 750 | ||
@@ -760,16 +752,16 @@ void elv_put_request(struct request_queue *q, struct request *rq) | |||
760 | { | 752 | { |
761 | struct elevator_queue *e = q->elevator; | 753 | struct elevator_queue *e = q->elevator; |
762 | 754 | ||
763 | if (e->ops->elevator_put_req_fn) | 755 | if (e->type->ops.elevator_put_req_fn) |
764 | e->ops->elevator_put_req_fn(rq); | 756 | e->type->ops.elevator_put_req_fn(rq); |
765 | } | 757 | } |
766 | 758 | ||
767 | int elv_may_queue(struct request_queue *q, int rw) | 759 | int elv_may_queue(struct request_queue *q, int rw) |
768 | { | 760 | { |
769 | struct elevator_queue *e = q->elevator; | 761 | struct elevator_queue *e = q->elevator; |
770 | 762 | ||
771 | if (e->ops->elevator_may_queue_fn) | 763 | if (e->type->ops.elevator_may_queue_fn) |
772 | return e->ops->elevator_may_queue_fn(q, rw); | 764 | return e->type->ops.elevator_may_queue_fn(q, rw); |
773 | 765 | ||
774 | return ELV_MQUEUE_MAY; | 766 | return ELV_MQUEUE_MAY; |
775 | } | 767 | } |
@@ -804,8 +796,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq) | |||
804 | if (blk_account_rq(rq)) { | 796 | if (blk_account_rq(rq)) { |
805 | q->in_flight[rq_is_sync(rq)]--; | 797 | q->in_flight[rq_is_sync(rq)]--; |
806 | if ((rq->cmd_flags & REQ_SORTED) && | 798 | if ((rq->cmd_flags & REQ_SORTED) && |
807 | e->ops->elevator_completed_req_fn) | 799 | e->type->ops.elevator_completed_req_fn) |
808 | e->ops->elevator_completed_req_fn(q, rq); | 800 | e->type->ops.elevator_completed_req_fn(q, rq); |
809 | } | 801 | } |
810 | } | 802 | } |
811 | 803 | ||
@@ -823,7 +815,7 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | |||
823 | 815 | ||
824 | e = container_of(kobj, struct elevator_queue, kobj); | 816 | e = container_of(kobj, struct elevator_queue, kobj); |
825 | mutex_lock(&e->sysfs_lock); | 817 | mutex_lock(&e->sysfs_lock); |
826 | error = e->ops ? entry->show(e, page) : -ENOENT; | 818 | error = e->type ? entry->show(e, page) : -ENOENT; |
827 | mutex_unlock(&e->sysfs_lock); | 819 | mutex_unlock(&e->sysfs_lock); |
828 | return error; | 820 | return error; |
829 | } | 821 | } |
@@ -841,7 +833,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr, | |||
841 | 833 | ||
842 | e = container_of(kobj, struct elevator_queue, kobj); | 834 | e = container_of(kobj, struct elevator_queue, kobj); |
843 | mutex_lock(&e->sysfs_lock); | 835 | mutex_lock(&e->sysfs_lock); |
844 | error = e->ops ? entry->store(e, page, length) : -ENOENT; | 836 | error = e->type ? entry->store(e, page, length) : -ENOENT; |
845 | mutex_unlock(&e->sysfs_lock); | 837 | mutex_unlock(&e->sysfs_lock); |
846 | return error; | 838 | return error; |
847 | } | 839 | } |
@@ -856,14 +848,13 @@ static struct kobj_type elv_ktype = { | |||
856 | .release = elevator_release, | 848 | .release = elevator_release, |
857 | }; | 849 | }; |
858 | 850 | ||
859 | int elv_register_queue(struct request_queue *q) | 851 | int __elv_register_queue(struct request_queue *q, struct elevator_queue *e) |
860 | { | 852 | { |
861 | struct elevator_queue *e = q->elevator; | ||
862 | int error; | 853 | int error; |
863 | 854 | ||
864 | error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); | 855 | error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); |
865 | if (!error) { | 856 | if (!error) { |
866 | struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; | 857 | struct elv_fs_entry *attr = e->type->elevator_attrs; |
867 | if (attr) { | 858 | if (attr) { |
868 | while (attr->attr.name) { | 859 | while (attr->attr.name) { |
869 | if (sysfs_create_file(&e->kobj, &attr->attr)) | 860 | if (sysfs_create_file(&e->kobj, &attr->attr)) |
@@ -876,31 +867,55 @@ int elv_register_queue(struct request_queue *q) | |||
876 | } | 867 | } |
877 | return error; | 868 | return error; |
878 | } | 869 | } |
879 | EXPORT_SYMBOL(elv_register_queue); | ||
880 | 870 | ||
881 | static void __elv_unregister_queue(struct elevator_queue *e) | 871 | int elv_register_queue(struct request_queue *q) |
882 | { | 872 | { |
883 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | 873 | return __elv_register_queue(q, q->elevator); |
884 | kobject_del(&e->kobj); | ||
885 | e->registered = 0; | ||
886 | } | 874 | } |
875 | EXPORT_SYMBOL(elv_register_queue); | ||
887 | 876 | ||
888 | void elv_unregister_queue(struct request_queue *q) | 877 | void elv_unregister_queue(struct request_queue *q) |
889 | { | 878 | { |
890 | if (q) | 879 | if (q) { |
891 | __elv_unregister_queue(q->elevator); | 880 | struct elevator_queue *e = q->elevator; |
881 | |||
882 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | ||
883 | kobject_del(&e->kobj); | ||
884 | e->registered = 0; | ||
885 | } | ||
892 | } | 886 | } |
893 | EXPORT_SYMBOL(elv_unregister_queue); | 887 | EXPORT_SYMBOL(elv_unregister_queue); |
894 | 888 | ||
895 | void elv_register(struct elevator_type *e) | 889 | int elv_register(struct elevator_type *e) |
896 | { | 890 | { |
897 | char *def = ""; | 891 | char *def = ""; |
898 | 892 | ||
893 | /* create icq_cache if requested */ | ||
894 | if (e->icq_size) { | ||
895 | if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || | ||
896 | WARN_ON(e->icq_align < __alignof__(struct io_cq))) | ||
897 | return -EINVAL; | ||
898 | |||
899 | snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), | ||
900 | "%s_io_cq", e->elevator_name); | ||
901 | e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, | ||
902 | e->icq_align, 0, NULL); | ||
903 | if (!e->icq_cache) | ||
904 | return -ENOMEM; | ||
905 | } | ||
906 | |||
907 | /* register, don't allow duplicate names */ | ||
899 | spin_lock(&elv_list_lock); | 908 | spin_lock(&elv_list_lock); |
900 | BUG_ON(elevator_find(e->elevator_name)); | 909 | if (elevator_find(e->elevator_name)) { |
910 | spin_unlock(&elv_list_lock); | ||
911 | if (e->icq_cache) | ||
912 | kmem_cache_destroy(e->icq_cache); | ||
913 | return -EBUSY; | ||
914 | } | ||
901 | list_add_tail(&e->list, &elv_list); | 915 | list_add_tail(&e->list, &elv_list); |
902 | spin_unlock(&elv_list_lock); | 916 | spin_unlock(&elv_list_lock); |
903 | 917 | ||
918 | /* print pretty message */ | ||
904 | if (!strcmp(e->elevator_name, chosen_elevator) || | 919 | if (!strcmp(e->elevator_name, chosen_elevator) || |
905 | (!*chosen_elevator && | 920 | (!*chosen_elevator && |
906 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) | 921 | !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) |
@@ -908,30 +923,26 @@ void elv_register(struct elevator_type *e) | |||
908 | 923 | ||
909 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, | 924 | printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, |
910 | def); | 925 | def); |
926 | return 0; | ||
911 | } | 927 | } |
912 | EXPORT_SYMBOL_GPL(elv_register); | 928 | EXPORT_SYMBOL_GPL(elv_register); |
913 | 929 | ||
914 | void elv_unregister(struct elevator_type *e) | 930 | void elv_unregister(struct elevator_type *e) |
915 | { | 931 | { |
916 | struct task_struct *g, *p; | 932 | /* unregister */ |
933 | spin_lock(&elv_list_lock); | ||
934 | list_del_init(&e->list); | ||
935 | spin_unlock(&elv_list_lock); | ||
917 | 936 | ||
918 | /* | 937 | /* |
919 | * Iterate every thread in the process to remove the io contexts. | 938 | * Destroy icq_cache if it exists. icq's are RCU managed. Make |
939 | * sure all RCU operations are complete before proceeding. | ||
920 | */ | 940 | */ |
921 | if (e->ops.trim) { | 941 | if (e->icq_cache) { |
922 | read_lock(&tasklist_lock); | 942 | rcu_barrier(); |
923 | do_each_thread(g, p) { | 943 | kmem_cache_destroy(e->icq_cache); |
924 | task_lock(p); | 944 | e->icq_cache = NULL; |
925 | if (p->io_context) | ||
926 | e->ops.trim(p->io_context); | ||
927 | task_unlock(p); | ||
928 | } while_each_thread(g, p); | ||
929 | read_unlock(&tasklist_lock); | ||
930 | } | 945 | } |
931 | |||
932 | spin_lock(&elv_list_lock); | ||
933 | list_del_init(&e->list); | ||
934 | spin_unlock(&elv_list_lock); | ||
935 | } | 946 | } |
936 | EXPORT_SYMBOL_GPL(elv_unregister); | 947 | EXPORT_SYMBOL_GPL(elv_unregister); |
937 | 948 | ||
@@ -944,54 +955,41 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
944 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | 955 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
945 | { | 956 | { |
946 | struct elevator_queue *old_elevator, *e; | 957 | struct elevator_queue *old_elevator, *e; |
947 | void *data; | ||
948 | int err; | 958 | int err; |
949 | 959 | ||
950 | /* | 960 | /* allocate new elevator */ |
951 | * Allocate new elevator | ||
952 | */ | ||
953 | e = elevator_alloc(q, new_e); | 961 | e = elevator_alloc(q, new_e); |
954 | if (!e) | 962 | if (!e) |
955 | return -ENOMEM; | 963 | return -ENOMEM; |
956 | 964 | ||
957 | data = elevator_init_queue(q, e); | 965 | err = elevator_init_queue(q, e); |
958 | if (!data) { | 966 | if (err) { |
959 | kobject_put(&e->kobj); | 967 | kobject_put(&e->kobj); |
960 | return -ENOMEM; | 968 | return err; |
961 | } | 969 | } |
962 | 970 | ||
963 | /* | 971 | /* turn on BYPASS and drain all requests w/ elevator private data */ |
964 | * Turn on BYPASS and drain all requests w/ elevator private data | ||
965 | */ | ||
966 | elv_quiesce_start(q); | 972 | elv_quiesce_start(q); |
967 | 973 | ||
968 | /* | 974 | /* unregister old queue, register new one and kill old elevator */ |
969 | * Remember old elevator. | 975 | if (q->elevator->registered) { |
970 | */ | 976 | elv_unregister_queue(q); |
971 | old_elevator = q->elevator; | 977 | err = __elv_register_queue(q, e); |
972 | |||
973 | /* | ||
974 | * attach and start new elevator | ||
975 | */ | ||
976 | spin_lock_irq(q->queue_lock); | ||
977 | elevator_attach(q, e, data); | ||
978 | spin_unlock_irq(q->queue_lock); | ||
979 | |||
980 | if (old_elevator->registered) { | ||
981 | __elv_unregister_queue(old_elevator); | ||
982 | |||
983 | err = elv_register_queue(q); | ||
984 | if (err) | 978 | if (err) |
985 | goto fail_register; | 979 | goto fail_register; |
986 | } | 980 | } |
987 | 981 | ||
988 | /* | 982 | /* done, clear io_cq's, switch elevators and turn off BYPASS */ |
989 | * finally exit old elevator and turn off BYPASS. | 983 | spin_lock_irq(q->queue_lock); |
990 | */ | 984 | ioc_clear_queue(q); |
985 | old_elevator = q->elevator; | ||
986 | q->elevator = e; | ||
987 | spin_unlock_irq(q->queue_lock); | ||
988 | |||
991 | elevator_exit(old_elevator); | 989 | elevator_exit(old_elevator); |
992 | elv_quiesce_end(q); | 990 | elv_quiesce_end(q); |
993 | 991 | ||
994 | blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); | 992 | blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name); |
995 | 993 | ||
996 | return 0; | 994 | return 0; |
997 | 995 | ||
@@ -1001,7 +999,6 @@ fail_register: | |||
1001 | * one again (along with re-adding the sysfs dir) | 999 | * one again (along with re-adding the sysfs dir) |
1002 | */ | 1000 | */ |
1003 | elevator_exit(e); | 1001 | elevator_exit(e); |
1004 | q->elevator = old_elevator; | ||
1005 | elv_register_queue(q); | 1002 | elv_register_queue(q); |
1006 | elv_quiesce_end(q); | 1003 | elv_quiesce_end(q); |
1007 | 1004 | ||
@@ -1026,7 +1023,7 @@ int elevator_change(struct request_queue *q, const char *name) | |||
1026 | return -EINVAL; | 1023 | return -EINVAL; |
1027 | } | 1024 | } |
1028 | 1025 | ||
1029 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | 1026 | if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { |
1030 | elevator_put(e); | 1027 | elevator_put(e); |
1031 | return 0; | 1028 | return 0; |
1032 | } | 1029 | } |
@@ -1061,7 +1058,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) | |||
1061 | if (!q->elevator || !blk_queue_stackable(q)) | 1058 | if (!q->elevator || !blk_queue_stackable(q)) |
1062 | return sprintf(name, "none\n"); | 1059 | return sprintf(name, "none\n"); |
1063 | 1060 | ||
1064 | elv = e->elevator_type; | 1061 | elv = e->type; |
1065 | 1062 | ||
1066 | spin_lock(&elv_list_lock); | 1063 | spin_lock(&elv_list_lock); |
1067 | list_for_each_entry(__e, &elv_list, list) { | 1064 | list_for_each_entry(__e, &elv_list, list) { |
diff --git a/block/genhd.c b/block/genhd.c index 83e7c04015e1..23b4f7063322 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -614,7 +614,7 @@ void add_disk(struct gendisk *disk) | |||
614 | * Take an extra ref on queue which will be put on disk_release() | 614 | * Take an extra ref on queue which will be put on disk_release() |
615 | * so that it sticks around as long as @disk is there. | 615 | * so that it sticks around as long as @disk is there. |
616 | */ | 616 | */ |
617 | WARN_ON_ONCE(blk_get_queue(disk->queue)); | 617 | WARN_ON_ONCE(!blk_get_queue(disk->queue)); |
618 | 618 | ||
619 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 619 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
620 | "bdi"); | 620 | "bdi"); |
diff --git a/block/ioctl.c b/block/ioctl.c index 4828fa349813..ba15b2dbfb98 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -296,6 +296,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
296 | return put_uint(arg, bdev_discard_zeroes_data(bdev)); | 296 | return put_uint(arg, bdev_discard_zeroes_data(bdev)); |
297 | case BLKSECTGET: | 297 | case BLKSECTGET: |
298 | return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); | 298 | return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); |
299 | case BLKROTATIONAL: | ||
300 | return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); | ||
299 | case BLKRASET: | 301 | case BLKRASET: |
300 | case BLKFRASET: | 302 | case BLKFRASET: |
301 | if(!capable(CAP_SYS_ADMIN)) | 303 | if(!capable(CAP_SYS_ADMIN)) |
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 06389e9ef96d..413a0b1d788c 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -94,9 +94,7 @@ static struct elevator_type elevator_noop = { | |||
94 | 94 | ||
95 | static int __init noop_init(void) | 95 | static int __init noop_init(void) |
96 | { | 96 | { |
97 | elv_register(&elevator_noop); | 97 | return elv_register(&elevator_noop); |
98 | |||
99 | return 0; | ||
100 | } | 98 | } |
101 | 99 | ||
102 | static void __exit noop_exit(void) | 100 | static void __exit noop_exit(void) |