diff options
-rw-r--r-- | block/Makefile | 3 | ||||
-rw-r--r-- | block/blk-core.c | 719 | ||||
-rw-r--r-- | block/blk-sysfs.c | 298 | ||||
-rw-r--r-- | block/blk-tag.c | 396 | ||||
-rw-r--r-- | block/blk.h | 29 |
5 files changed, 735 insertions, 710 deletions
diff --git a/block/Makefile b/block/Makefile index 75597c1263e0..fcaae4ae6704 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -2,7 +2,8 @@ | |||
2 | # Makefile for the kernel block layer | 2 | # Makefile for the kernel block layer |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o ioctl.o genhd.o scsi_ioctl.o | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o ioctl.o \ |
6 | genhd.o scsi_ioctl.o | ||
6 | 7 | ||
7 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 8 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
8 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 9 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
diff --git a/block/blk-core.c b/block/blk-core.c index 1932a56f5e4b..937f9d0b9bd5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <linux/fault-inject.h> | 32 | #include <linux/fault-inject.h> |
33 | #include <linux/scatterlist.h> | 33 | #include <linux/scatterlist.h> |
34 | 34 | ||
35 | #include "blk.h" | ||
36 | |||
35 | /* | 37 | /* |
36 | * for max sense size | 38 | * for max sense size |
37 | */ | 39 | */ |
@@ -50,12 +52,12 @@ static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
50 | /* | 52 | /* |
51 | * For the allocated request tables | 53 | * For the allocated request tables |
52 | */ | 54 | */ |
53 | static struct kmem_cache *request_cachep; | 55 | struct kmem_cache *request_cachep; |
54 | 56 | ||
55 | /* | 57 | /* |
56 | * For queue allocation | 58 | * For queue allocation |
57 | */ | 59 | */ |
58 | static struct kmem_cache *requestq_cachep; | 60 | struct kmem_cache *blk_requestq_cachep = NULL; |
59 | 61 | ||
60 | /* | 62 | /* |
61 | * For io context allocations | 63 | * For io context allocations |
@@ -80,25 +82,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | |||
80 | /* Number of requests a "batching" process may submit */ | 82 | /* Number of requests a "batching" process may submit */ |
81 | #define BLK_BATCH_REQ 32 | 83 | #define BLK_BATCH_REQ 32 |
82 | 84 | ||
83 | /* | 85 | void blk_queue_congestion_threshold(struct request_queue *q) |
84 | * Return the threshold (number of used requests) at which the queue is | ||
85 | * considered to be congested. It include a little hysteresis to keep the | ||
86 | * context switch rate down. | ||
87 | */ | ||
88 | static inline int queue_congestion_on_threshold(struct request_queue *q) | ||
89 | { | ||
90 | return q->nr_congestion_on; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * The threshold at which a queue is considered to be uncongested | ||
95 | */ | ||
96 | static inline int queue_congestion_off_threshold(struct request_queue *q) | ||
97 | { | ||
98 | return q->nr_congestion_off; | ||
99 | } | ||
100 | |||
101 | static void blk_queue_congestion_threshold(struct request_queue *q) | ||
102 | { | 86 | { |
103 | int nr; | 87 | int nr; |
104 | 88 | ||
@@ -817,397 +801,6 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) | |||
817 | 801 | ||
818 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); | 802 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); |
819 | 803 | ||
820 | /** | ||
821 | * blk_queue_find_tag - find a request by its tag and queue | ||
822 | * @q: The request queue for the device | ||
823 | * @tag: The tag of the request | ||
824 | * | ||
825 | * Notes: | ||
826 | * Should be used when a device returns a tag and you want to match | ||
827 | * it with a request. | ||
828 | * | ||
829 | * no locks need be held. | ||
830 | **/ | ||
831 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) | ||
832 | { | ||
833 | return blk_map_queue_find_tag(q->queue_tags, tag); | ||
834 | } | ||
835 | |||
836 | EXPORT_SYMBOL(blk_queue_find_tag); | ||
837 | |||
838 | /** | ||
839 | * __blk_free_tags - release a given set of tag maintenance info | ||
840 | * @bqt: the tag map to free | ||
841 | * | ||
842 | * Tries to free the specified @bqt@. Returns true if it was | ||
843 | * actually freed and false if there are still references using it | ||
844 | */ | ||
845 | static int __blk_free_tags(struct blk_queue_tag *bqt) | ||
846 | { | ||
847 | int retval; | ||
848 | |||
849 | retval = atomic_dec_and_test(&bqt->refcnt); | ||
850 | if (retval) { | ||
851 | BUG_ON(bqt->busy); | ||
852 | |||
853 | kfree(bqt->tag_index); | ||
854 | bqt->tag_index = NULL; | ||
855 | |||
856 | kfree(bqt->tag_map); | ||
857 | bqt->tag_map = NULL; | ||
858 | |||
859 | kfree(bqt); | ||
860 | |||
861 | } | ||
862 | |||
863 | return retval; | ||
864 | } | ||
865 | |||
866 | /** | ||
867 | * __blk_queue_free_tags - release tag maintenance info | ||
868 | * @q: the request queue for the device | ||
869 | * | ||
870 | * Notes: | ||
871 | * blk_cleanup_queue() will take care of calling this function, if tagging | ||
872 | * has been used. So there's no need to call this directly. | ||
873 | **/ | ||
874 | static void __blk_queue_free_tags(struct request_queue *q) | ||
875 | { | ||
876 | struct blk_queue_tag *bqt = q->queue_tags; | ||
877 | |||
878 | if (!bqt) | ||
879 | return; | ||
880 | |||
881 | __blk_free_tags(bqt); | ||
882 | |||
883 | q->queue_tags = NULL; | ||
884 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); | ||
885 | } | ||
886 | |||
887 | |||
888 | /** | ||
889 | * blk_free_tags - release a given set of tag maintenance info | ||
890 | * @bqt: the tag map to free | ||
891 | * | ||
892 | * For externally managed @bqt@ frees the map. Callers of this | ||
893 | * function must guarantee to have released all the queues that | ||
894 | * might have been using this tag map. | ||
895 | */ | ||
896 | void blk_free_tags(struct blk_queue_tag *bqt) | ||
897 | { | ||
898 | if (unlikely(!__blk_free_tags(bqt))) | ||
899 | BUG(); | ||
900 | } | ||
901 | EXPORT_SYMBOL(blk_free_tags); | ||
902 | |||
903 | /** | ||
904 | * blk_queue_free_tags - release tag maintenance info | ||
905 | * @q: the request queue for the device | ||
906 | * | ||
907 | * Notes: | ||
908 | * This is used to disabled tagged queuing to a device, yet leave | ||
909 | * queue in function. | ||
910 | **/ | ||
911 | void blk_queue_free_tags(struct request_queue *q) | ||
912 | { | ||
913 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | ||
914 | } | ||
915 | |||
916 | EXPORT_SYMBOL(blk_queue_free_tags); | ||
917 | |||
918 | static int | ||
919 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) | ||
920 | { | ||
921 | struct request **tag_index; | ||
922 | unsigned long *tag_map; | ||
923 | int nr_ulongs; | ||
924 | |||
925 | if (q && depth > q->nr_requests * 2) { | ||
926 | depth = q->nr_requests * 2; | ||
927 | printk(KERN_ERR "%s: adjusted depth to %d\n", | ||
928 | __FUNCTION__, depth); | ||
929 | } | ||
930 | |||
931 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); | ||
932 | if (!tag_index) | ||
933 | goto fail; | ||
934 | |||
935 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; | ||
936 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); | ||
937 | if (!tag_map) | ||
938 | goto fail; | ||
939 | |||
940 | tags->real_max_depth = depth; | ||
941 | tags->max_depth = depth; | ||
942 | tags->tag_index = tag_index; | ||
943 | tags->tag_map = tag_map; | ||
944 | |||
945 | return 0; | ||
946 | fail: | ||
947 | kfree(tag_index); | ||
948 | return -ENOMEM; | ||
949 | } | ||
950 | |||
951 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | ||
952 | int depth) | ||
953 | { | ||
954 | struct blk_queue_tag *tags; | ||
955 | |||
956 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); | ||
957 | if (!tags) | ||
958 | goto fail; | ||
959 | |||
960 | if (init_tag_map(q, tags, depth)) | ||
961 | goto fail; | ||
962 | |||
963 | tags->busy = 0; | ||
964 | atomic_set(&tags->refcnt, 1); | ||
965 | return tags; | ||
966 | fail: | ||
967 | kfree(tags); | ||
968 | return NULL; | ||
969 | } | ||
970 | |||
971 | /** | ||
972 | * blk_init_tags - initialize the tag info for an external tag map | ||
973 | * @depth: the maximum queue depth supported | ||
974 | * @tags: the tag to use | ||
975 | **/ | ||
976 | struct blk_queue_tag *blk_init_tags(int depth) | ||
977 | { | ||
978 | return __blk_queue_init_tags(NULL, depth); | ||
979 | } | ||
980 | EXPORT_SYMBOL(blk_init_tags); | ||
981 | |||
982 | /** | ||
983 | * blk_queue_init_tags - initialize the queue tag info | ||
984 | * @q: the request queue for the device | ||
985 | * @depth: the maximum queue depth supported | ||
986 | * @tags: the tag to use | ||
987 | **/ | ||
988 | int blk_queue_init_tags(struct request_queue *q, int depth, | ||
989 | struct blk_queue_tag *tags) | ||
990 | { | ||
991 | int rc; | ||
992 | |||
993 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | ||
994 | |||
995 | if (!tags && !q->queue_tags) { | ||
996 | tags = __blk_queue_init_tags(q, depth); | ||
997 | |||
998 | if (!tags) | ||
999 | goto fail; | ||
1000 | } else if (q->queue_tags) { | ||
1001 | if ((rc = blk_queue_resize_tags(q, depth))) | ||
1002 | return rc; | ||
1003 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | ||
1004 | return 0; | ||
1005 | } else | ||
1006 | atomic_inc(&tags->refcnt); | ||
1007 | |||
1008 | /* | ||
1009 | * assign it, all done | ||
1010 | */ | ||
1011 | q->queue_tags = tags; | ||
1012 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); | ||
1013 | INIT_LIST_HEAD(&q->tag_busy_list); | ||
1014 | return 0; | ||
1015 | fail: | ||
1016 | kfree(tags); | ||
1017 | return -ENOMEM; | ||
1018 | } | ||
1019 | |||
1020 | EXPORT_SYMBOL(blk_queue_init_tags); | ||
1021 | |||
1022 | /** | ||
1023 | * blk_queue_resize_tags - change the queueing depth | ||
1024 | * @q: the request queue for the device | ||
1025 | * @new_depth: the new max command queueing depth | ||
1026 | * | ||
1027 | * Notes: | ||
1028 | * Must be called with the queue lock held. | ||
1029 | **/ | ||
1030 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) | ||
1031 | { | ||
1032 | struct blk_queue_tag *bqt = q->queue_tags; | ||
1033 | struct request **tag_index; | ||
1034 | unsigned long *tag_map; | ||
1035 | int max_depth, nr_ulongs; | ||
1036 | |||
1037 | if (!bqt) | ||
1038 | return -ENXIO; | ||
1039 | |||
1040 | /* | ||
1041 | * if we already have large enough real_max_depth. just | ||
1042 | * adjust max_depth. *NOTE* as requests with tag value | ||
1043 | * between new_depth and real_max_depth can be in-flight, tag | ||
1044 | * map can not be shrunk blindly here. | ||
1045 | */ | ||
1046 | if (new_depth <= bqt->real_max_depth) { | ||
1047 | bqt->max_depth = new_depth; | ||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * Currently cannot replace a shared tag map with a new | ||
1053 | * one, so error out if this is the case | ||
1054 | */ | ||
1055 | if (atomic_read(&bqt->refcnt) != 1) | ||
1056 | return -EBUSY; | ||
1057 | |||
1058 | /* | ||
1059 | * save the old state info, so we can copy it back | ||
1060 | */ | ||
1061 | tag_index = bqt->tag_index; | ||
1062 | tag_map = bqt->tag_map; | ||
1063 | max_depth = bqt->real_max_depth; | ||
1064 | |||
1065 | if (init_tag_map(q, bqt, new_depth)) | ||
1066 | return -ENOMEM; | ||
1067 | |||
1068 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); | ||
1069 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; | ||
1070 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); | ||
1071 | |||
1072 | kfree(tag_index); | ||
1073 | kfree(tag_map); | ||
1074 | return 0; | ||
1075 | } | ||
1076 | |||
1077 | EXPORT_SYMBOL(blk_queue_resize_tags); | ||
1078 | |||
1079 | /** | ||
1080 | * blk_queue_end_tag - end tag operations for a request | ||
1081 | * @q: the request queue for the device | ||
1082 | * @rq: the request that has completed | ||
1083 | * | ||
1084 | * Description: | ||
1085 | * Typically called when end_that_request_first() returns 0, meaning | ||
1086 | * all transfers have been done for a request. It's important to call | ||
1087 | * this function before end_that_request_last(), as that will put the | ||
1088 | * request back on the free list thus corrupting the internal tag list. | ||
1089 | * | ||
1090 | * Notes: | ||
1091 | * queue lock must be held. | ||
1092 | **/ | ||
1093 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) | ||
1094 | { | ||
1095 | struct blk_queue_tag *bqt = q->queue_tags; | ||
1096 | int tag = rq->tag; | ||
1097 | |||
1098 | BUG_ON(tag == -1); | ||
1099 | |||
1100 | if (unlikely(tag >= bqt->real_max_depth)) | ||
1101 | /* | ||
1102 | * This can happen after tag depth has been reduced. | ||
1103 | * FIXME: how about a warning or info message here? | ||
1104 | */ | ||
1105 | return; | ||
1106 | |||
1107 | list_del_init(&rq->queuelist); | ||
1108 | rq->cmd_flags &= ~REQ_QUEUED; | ||
1109 | rq->tag = -1; | ||
1110 | |||
1111 | if (unlikely(bqt->tag_index[tag] == NULL)) | ||
1112 | printk(KERN_ERR "%s: tag %d is missing\n", | ||
1113 | __FUNCTION__, tag); | ||
1114 | |||
1115 | bqt->tag_index[tag] = NULL; | ||
1116 | |||
1117 | if (unlikely(!test_bit(tag, bqt->tag_map))) { | ||
1118 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", | ||
1119 | __FUNCTION__, tag); | ||
1120 | return; | ||
1121 | } | ||
1122 | /* | ||
1123 | * The tag_map bit acts as a lock for tag_index[bit], so we need | ||
1124 | * unlock memory barrier semantics. | ||
1125 | */ | ||
1126 | clear_bit_unlock(tag, bqt->tag_map); | ||
1127 | bqt->busy--; | ||
1128 | } | ||
1129 | |||
1130 | EXPORT_SYMBOL(blk_queue_end_tag); | ||
1131 | |||
1132 | /** | ||
1133 | * blk_queue_start_tag - find a free tag and assign it | ||
1134 | * @q: the request queue for the device | ||
1135 | * @rq: the block request that needs tagging | ||
1136 | * | ||
1137 | * Description: | ||
1138 | * This can either be used as a stand-alone helper, or possibly be | ||
1139 | * assigned as the queue &prep_rq_fn (in which case &struct request | ||
1140 | * automagically gets a tag assigned). Note that this function | ||
1141 | * assumes that any type of request can be queued! if this is not | ||
1142 | * true for your device, you must check the request type before | ||
1143 | * calling this function. The request will also be removed from | ||
1144 | * the request queue, so it's the drivers responsibility to readd | ||
1145 | * it if it should need to be restarted for some reason. | ||
1146 | * | ||
1147 | * Notes: | ||
1148 | * queue lock must be held. | ||
1149 | **/ | ||
1150 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) | ||
1151 | { | ||
1152 | struct blk_queue_tag *bqt = q->queue_tags; | ||
1153 | int tag; | ||
1154 | |||
1155 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | ||
1156 | printk(KERN_ERR | ||
1157 | "%s: request %p for device [%s] already tagged %d", | ||
1158 | __FUNCTION__, rq, | ||
1159 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); | ||
1160 | BUG(); | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * Protect against shared tag maps, as we may not have exclusive | ||
1165 | * access to the tag map. | ||
1166 | */ | ||
1167 | do { | ||
1168 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | ||
1169 | if (tag >= bqt->max_depth) | ||
1170 | return 1; | ||
1171 | |||
1172 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); | ||
1173 | /* | ||
1174 | * We need lock ordering semantics given by test_and_set_bit_lock. | ||
1175 | * See blk_queue_end_tag for details. | ||
1176 | */ | ||
1177 | |||
1178 | rq->cmd_flags |= REQ_QUEUED; | ||
1179 | rq->tag = tag; | ||
1180 | bqt->tag_index[tag] = rq; | ||
1181 | blkdev_dequeue_request(rq); | ||
1182 | list_add(&rq->queuelist, &q->tag_busy_list); | ||
1183 | bqt->busy++; | ||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1187 | EXPORT_SYMBOL(blk_queue_start_tag); | ||
1188 | |||
1189 | /** | ||
1190 | * blk_queue_invalidate_tags - invalidate all pending tags | ||
1191 | * @q: the request queue for the device | ||
1192 | * | ||
1193 | * Description: | ||
1194 | * Hardware conditions may dictate a need to stop all pending requests. | ||
1195 | * In this case, we will safely clear the block side of the tag queue and | ||
1196 | * readd all requests to the request queue in the right order. | ||
1197 | * | ||
1198 | * Notes: | ||
1199 | * queue lock must be held. | ||
1200 | **/ | ||
1201 | void blk_queue_invalidate_tags(struct request_queue *q) | ||
1202 | { | ||
1203 | struct list_head *tmp, *n; | ||
1204 | |||
1205 | list_for_each_safe(tmp, n, &q->tag_busy_list) | ||
1206 | blk_requeue_request(q, list_entry_rq(tmp)); | ||
1207 | } | ||
1208 | |||
1209 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | ||
1210 | |||
1211 | void blk_dump_rq_flags(struct request *rq, char *msg) | 804 | void blk_dump_rq_flags(struct request *rq, char *msg) |
1212 | { | 805 | { |
1213 | int bit; | 806 | int bit; |
@@ -1828,41 +1421,6 @@ void blk_run_queue(struct request_queue *q) | |||
1828 | } | 1421 | } |
1829 | EXPORT_SYMBOL(blk_run_queue); | 1422 | EXPORT_SYMBOL(blk_run_queue); |
1830 | 1423 | ||
1831 | /** | ||
1832 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed | ||
1833 | * @kobj: the kobj belonging of the request queue to be released | ||
1834 | * | ||
1835 | * Description: | ||
1836 | * blk_cleanup_queue is the pair to blk_init_queue() or | ||
1837 | * blk_queue_make_request(). It should be called when a request queue is | ||
1838 | * being released; typically when a block device is being de-registered. | ||
1839 | * Currently, its primary task it to free all the &struct request | ||
1840 | * structures that were allocated to the queue and the queue itself. | ||
1841 | * | ||
1842 | * Caveat: | ||
1843 | * Hopefully the low level driver will have finished any | ||
1844 | * outstanding requests first... | ||
1845 | **/ | ||
1846 | static void blk_release_queue(struct kobject *kobj) | ||
1847 | { | ||
1848 | struct request_queue *q = | ||
1849 | container_of(kobj, struct request_queue, kobj); | ||
1850 | struct request_list *rl = &q->rq; | ||
1851 | |||
1852 | blk_sync_queue(q); | ||
1853 | |||
1854 | if (rl->rq_pool) | ||
1855 | mempool_destroy(rl->rq_pool); | ||
1856 | |||
1857 | if (q->queue_tags) | ||
1858 | __blk_queue_free_tags(q); | ||
1859 | |||
1860 | blk_trace_shutdown(q); | ||
1861 | |||
1862 | bdi_destroy(&q->backing_dev_info); | ||
1863 | kmem_cache_free(requestq_cachep, q); | ||
1864 | } | ||
1865 | |||
1866 | void blk_put_queue(struct request_queue *q) | 1424 | void blk_put_queue(struct request_queue *q) |
1867 | { | 1425 | { |
1868 | kobject_put(&q->kobj); | 1426 | kobject_put(&q->kobj); |
@@ -1908,14 +1466,12 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | |||
1908 | } | 1466 | } |
1909 | EXPORT_SYMBOL(blk_alloc_queue); | 1467 | EXPORT_SYMBOL(blk_alloc_queue); |
1910 | 1468 | ||
1911 | static struct kobj_type queue_ktype; | ||
1912 | |||
1913 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 1469 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
1914 | { | 1470 | { |
1915 | struct request_queue *q; | 1471 | struct request_queue *q; |
1916 | int err; | 1472 | int err; |
1917 | 1473 | ||
1918 | q = kmem_cache_alloc_node(requestq_cachep, | 1474 | q = kmem_cache_alloc_node(blk_requestq_cachep, |
1919 | gfp_mask | __GFP_ZERO, node_id); | 1475 | gfp_mask | __GFP_ZERO, node_id); |
1920 | if (!q) | 1476 | if (!q) |
1921 | return NULL; | 1477 | return NULL; |
@@ -1924,13 +1480,13 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
1924 | q->backing_dev_info.unplug_io_data = q; | 1480 | q->backing_dev_info.unplug_io_data = q; |
1925 | err = bdi_init(&q->backing_dev_info); | 1481 | err = bdi_init(&q->backing_dev_info); |
1926 | if (err) { | 1482 | if (err) { |
1927 | kmem_cache_free(requestq_cachep, q); | 1483 | kmem_cache_free(blk_requestq_cachep, q); |
1928 | return NULL; | 1484 | return NULL; |
1929 | } | 1485 | } |
1930 | 1486 | ||
1931 | init_timer(&q->unplug_timer); | 1487 | init_timer(&q->unplug_timer); |
1932 | 1488 | ||
1933 | kobject_init(&q->kobj, &queue_ktype); | 1489 | kobject_init(&q->kobj, &blk_queue_ktype); |
1934 | 1490 | ||
1935 | mutex_init(&q->sysfs_lock); | 1491 | mutex_init(&q->sysfs_lock); |
1936 | 1492 | ||
@@ -1987,7 +1543,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
1987 | 1543 | ||
1988 | q->node = node_id; | 1544 | q->node = node_id; |
1989 | if (blk_init_free_list(q)) { | 1545 | if (blk_init_free_list(q)) { |
1990 | kmem_cache_free(requestq_cachep, q); | 1546 | kmem_cache_free(blk_requestq_cachep, q); |
1991 | return NULL; | 1547 | return NULL; |
1992 | } | 1548 | } |
1993 | 1549 | ||
@@ -4012,7 +3568,7 @@ int __init blk_dev_init(void) | |||
4012 | request_cachep = kmem_cache_create("blkdev_requests", | 3568 | request_cachep = kmem_cache_create("blkdev_requests", |
4013 | sizeof(struct request), 0, SLAB_PANIC, NULL); | 3569 | sizeof(struct request), 0, SLAB_PANIC, NULL); |
4014 | 3570 | ||
4015 | requestq_cachep = kmem_cache_create("blkdev_queue", | 3571 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", |
4016 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 3572 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
4017 | 3573 | ||
4018 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3574 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
@@ -4200,258 +3756,3 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) | |||
4200 | } | 3756 | } |
4201 | EXPORT_SYMBOL(swap_io_context); | 3757 | EXPORT_SYMBOL(swap_io_context); |
4202 | 3758 | ||
4203 | /* | ||
4204 | * sysfs parts below | ||
4205 | */ | ||
4206 | struct queue_sysfs_entry { | ||
4207 | struct attribute attr; | ||
4208 | ssize_t (*show)(struct request_queue *, char *); | ||
4209 | ssize_t (*store)(struct request_queue *, const char *, size_t); | ||
4210 | }; | ||
4211 | |||
4212 | static ssize_t | ||
4213 | queue_var_show(unsigned int var, char *page) | ||
4214 | { | ||
4215 | return sprintf(page, "%d\n", var); | ||
4216 | } | ||
4217 | |||
4218 | static ssize_t | ||
4219 | queue_var_store(unsigned long *var, const char *page, size_t count) | ||
4220 | { | ||
4221 | char *p = (char *) page; | ||
4222 | |||
4223 | *var = simple_strtoul(p, &p, 10); | ||
4224 | return count; | ||
4225 | } | ||
4226 | |||
4227 | static ssize_t queue_requests_show(struct request_queue *q, char *page) | ||
4228 | { | ||
4229 | return queue_var_show(q->nr_requests, (page)); | ||
4230 | } | ||
4231 | |||
4232 | static ssize_t | ||
4233 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | ||
4234 | { | ||
4235 | struct request_list *rl = &q->rq; | ||
4236 | unsigned long nr; | ||
4237 | int ret = queue_var_store(&nr, page, count); | ||
4238 | if (nr < BLKDEV_MIN_RQ) | ||
4239 | nr = BLKDEV_MIN_RQ; | ||
4240 | |||
4241 | spin_lock_irq(q->queue_lock); | ||
4242 | q->nr_requests = nr; | ||
4243 | blk_queue_congestion_threshold(q); | ||
4244 | |||
4245 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) | ||
4246 | blk_set_queue_congested(q, READ); | ||
4247 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) | ||
4248 | blk_clear_queue_congested(q, READ); | ||
4249 | |||
4250 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) | ||
4251 | blk_set_queue_congested(q, WRITE); | ||
4252 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) | ||
4253 | blk_clear_queue_congested(q, WRITE); | ||
4254 | |||
4255 | if (rl->count[READ] >= q->nr_requests) { | ||
4256 | blk_set_queue_full(q, READ); | ||
4257 | } else if (rl->count[READ]+1 <= q->nr_requests) { | ||
4258 | blk_clear_queue_full(q, READ); | ||
4259 | wake_up(&rl->wait[READ]); | ||
4260 | } | ||
4261 | |||
4262 | if (rl->count[WRITE] >= q->nr_requests) { | ||
4263 | blk_set_queue_full(q, WRITE); | ||
4264 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { | ||
4265 | blk_clear_queue_full(q, WRITE); | ||
4266 | wake_up(&rl->wait[WRITE]); | ||
4267 | } | ||
4268 | spin_unlock_irq(q->queue_lock); | ||
4269 | return ret; | ||
4270 | } | ||
4271 | |||
4272 | static ssize_t queue_ra_show(struct request_queue *q, char *page) | ||
4273 | { | ||
4274 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | ||
4275 | |||
4276 | return queue_var_show(ra_kb, (page)); | ||
4277 | } | ||
4278 | |||
4279 | static ssize_t | ||
4280 | queue_ra_store(struct request_queue *q, const char *page, size_t count) | ||
4281 | { | ||
4282 | unsigned long ra_kb; | ||
4283 | ssize_t ret = queue_var_store(&ra_kb, page, count); | ||
4284 | |||
4285 | spin_lock_irq(q->queue_lock); | ||
4286 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | ||
4287 | spin_unlock_irq(q->queue_lock); | ||
4288 | |||
4289 | return ret; | ||
4290 | } | ||
4291 | |||
4292 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) | ||
4293 | { | ||
4294 | int max_sectors_kb = q->max_sectors >> 1; | ||
4295 | |||
4296 | return queue_var_show(max_sectors_kb, (page)); | ||
4297 | } | ||
4298 | |||
4299 | static ssize_t | ||
4300 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | ||
4301 | { | ||
4302 | unsigned long max_sectors_kb, | ||
4303 | max_hw_sectors_kb = q->max_hw_sectors >> 1, | ||
4304 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | ||
4305 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | ||
4306 | |||
4307 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | ||
4308 | return -EINVAL; | ||
4309 | /* | ||
4310 | * Take the queue lock to update the readahead and max_sectors | ||
4311 | * values synchronously: | ||
4312 | */ | ||
4313 | spin_lock_irq(q->queue_lock); | ||
4314 | q->max_sectors = max_sectors_kb << 1; | ||
4315 | spin_unlock_irq(q->queue_lock); | ||
4316 | |||
4317 | return ret; | ||
4318 | } | ||
4319 | |||
4320 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) | ||
4321 | { | ||
4322 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; | ||
4323 | |||
4324 | return queue_var_show(max_hw_sectors_kb, (page)); | ||
4325 | } | ||
4326 | |||
4327 | |||
4328 | static struct queue_sysfs_entry queue_requests_entry = { | ||
4329 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | ||
4330 | .show = queue_requests_show, | ||
4331 | .store = queue_requests_store, | ||
4332 | }; | ||
4333 | |||
4334 | static struct queue_sysfs_entry queue_ra_entry = { | ||
4335 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, | ||
4336 | .show = queue_ra_show, | ||
4337 | .store = queue_ra_store, | ||
4338 | }; | ||
4339 | |||
4340 | static struct queue_sysfs_entry queue_max_sectors_entry = { | ||
4341 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, | ||
4342 | .show = queue_max_sectors_show, | ||
4343 | .store = queue_max_sectors_store, | ||
4344 | }; | ||
4345 | |||
4346 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { | ||
4347 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, | ||
4348 | .show = queue_max_hw_sectors_show, | ||
4349 | }; | ||
4350 | |||
4351 | static struct queue_sysfs_entry queue_iosched_entry = { | ||
4352 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, | ||
4353 | .show = elv_iosched_show, | ||
4354 | .store = elv_iosched_store, | ||
4355 | }; | ||
4356 | |||
4357 | static struct attribute *default_attrs[] = { | ||
4358 | &queue_requests_entry.attr, | ||
4359 | &queue_ra_entry.attr, | ||
4360 | &queue_max_hw_sectors_entry.attr, | ||
4361 | &queue_max_sectors_entry.attr, | ||
4362 | &queue_iosched_entry.attr, | ||
4363 | NULL, | ||
4364 | }; | ||
4365 | |||
4366 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | ||
4367 | |||
4368 | static ssize_t | ||
4369 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
4370 | { | ||
4371 | struct queue_sysfs_entry *entry = to_queue(attr); | ||
4372 | struct request_queue *q = | ||
4373 | container_of(kobj, struct request_queue, kobj); | ||
4374 | ssize_t res; | ||
4375 | |||
4376 | if (!entry->show) | ||
4377 | return -EIO; | ||
4378 | mutex_lock(&q->sysfs_lock); | ||
4379 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | ||
4380 | mutex_unlock(&q->sysfs_lock); | ||
4381 | return -ENOENT; | ||
4382 | } | ||
4383 | res = entry->show(q, page); | ||
4384 | mutex_unlock(&q->sysfs_lock); | ||
4385 | return res; | ||
4386 | } | ||
4387 | |||
4388 | static ssize_t | ||
4389 | queue_attr_store(struct kobject *kobj, struct attribute *attr, | ||
4390 | const char *page, size_t length) | ||
4391 | { | ||
4392 | struct queue_sysfs_entry *entry = to_queue(attr); | ||
4393 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); | ||
4394 | |||
4395 | ssize_t res; | ||
4396 | |||
4397 | if (!entry->store) | ||
4398 | return -EIO; | ||
4399 | mutex_lock(&q->sysfs_lock); | ||
4400 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | ||
4401 | mutex_unlock(&q->sysfs_lock); | ||
4402 | return -ENOENT; | ||
4403 | } | ||
4404 | res = entry->store(q, page, length); | ||
4405 | mutex_unlock(&q->sysfs_lock); | ||
4406 | return res; | ||
4407 | } | ||
4408 | |||
4409 | static struct sysfs_ops queue_sysfs_ops = { | ||
4410 | .show = queue_attr_show, | ||
4411 | .store = queue_attr_store, | ||
4412 | }; | ||
4413 | |||
4414 | static struct kobj_type queue_ktype = { | ||
4415 | .sysfs_ops = &queue_sysfs_ops, | ||
4416 | .default_attrs = default_attrs, | ||
4417 | .release = blk_release_queue, | ||
4418 | }; | ||
4419 | |||
4420 | int blk_register_queue(struct gendisk *disk) | ||
4421 | { | ||
4422 | int ret; | ||
4423 | |||
4424 | struct request_queue *q = disk->queue; | ||
4425 | |||
4426 | if (!q || !q->request_fn) | ||
4427 | return -ENXIO; | ||
4428 | |||
4429 | ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), | ||
4430 | "%s", "queue"); | ||
4431 | if (ret < 0) | ||
4432 | return ret; | ||
4433 | |||
4434 | kobject_uevent(&q->kobj, KOBJ_ADD); | ||
4435 | |||
4436 | ret = elv_register_queue(q); | ||
4437 | if (ret) { | ||
4438 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | ||
4439 | kobject_del(&q->kobj); | ||
4440 | return ret; | ||
4441 | } | ||
4442 | |||
4443 | return 0; | ||
4444 | } | ||
4445 | |||
4446 | void blk_unregister_queue(struct gendisk *disk) | ||
4447 | { | ||
4448 | struct request_queue *q = disk->queue; | ||
4449 | |||
4450 | if (q && q->request_fn) { | ||
4451 | elv_unregister_queue(q); | ||
4452 | |||
4453 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | ||
4454 | kobject_del(&q->kobj); | ||
4455 | kobject_put(&disk->dev.kobj); | ||
4456 | } | ||
4457 | } | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c new file mode 100644 index 000000000000..d9b9afab3456 --- /dev/null +++ b/block/blk-sysfs.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * Functions related to sysfs handling | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/bio.h> | ||
7 | #include <linux/blkdev.h> | ||
8 | #include <linux/blktrace_api.h> | ||
9 | |||
10 | #include "blk.h" | ||
11 | |||
12 | struct queue_sysfs_entry { | ||
13 | struct attribute attr; | ||
14 | ssize_t (*show)(struct request_queue *, char *); | ||
15 | ssize_t (*store)(struct request_queue *, const char *, size_t); | ||
16 | }; | ||
17 | |||
18 | static ssize_t | ||
19 | queue_var_show(unsigned int var, char *page) | ||
20 | { | ||
21 | return sprintf(page, "%d\n", var); | ||
22 | } | ||
23 | |||
24 | static ssize_t | ||
25 | queue_var_store(unsigned long *var, const char *page, size_t count) | ||
26 | { | ||
27 | char *p = (char *) page; | ||
28 | |||
29 | *var = simple_strtoul(p, &p, 10); | ||
30 | return count; | ||
31 | } | ||
32 | |||
33 | static ssize_t queue_requests_show(struct request_queue *q, char *page) | ||
34 | { | ||
35 | return queue_var_show(q->nr_requests, (page)); | ||
36 | } | ||
37 | |||
38 | static ssize_t | ||
39 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | ||
40 | { | ||
41 | struct request_list *rl = &q->rq; | ||
42 | unsigned long nr; | ||
43 | int ret = queue_var_store(&nr, page, count); | ||
44 | if (nr < BLKDEV_MIN_RQ) | ||
45 | nr = BLKDEV_MIN_RQ; | ||
46 | |||
47 | spin_lock_irq(q->queue_lock); | ||
48 | q->nr_requests = nr; | ||
49 | blk_queue_congestion_threshold(q); | ||
50 | |||
51 | if (rl->count[READ] >= queue_congestion_on_threshold(q)) | ||
52 | blk_set_queue_congested(q, READ); | ||
53 | else if (rl->count[READ] < queue_congestion_off_threshold(q)) | ||
54 | blk_clear_queue_congested(q, READ); | ||
55 | |||
56 | if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) | ||
57 | blk_set_queue_congested(q, WRITE); | ||
58 | else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) | ||
59 | blk_clear_queue_congested(q, WRITE); | ||
60 | |||
61 | if (rl->count[READ] >= q->nr_requests) { | ||
62 | blk_set_queue_full(q, READ); | ||
63 | } else if (rl->count[READ]+1 <= q->nr_requests) { | ||
64 | blk_clear_queue_full(q, READ); | ||
65 | wake_up(&rl->wait[READ]); | ||
66 | } | ||
67 | |||
68 | if (rl->count[WRITE] >= q->nr_requests) { | ||
69 | blk_set_queue_full(q, WRITE); | ||
70 | } else if (rl->count[WRITE]+1 <= q->nr_requests) { | ||
71 | blk_clear_queue_full(q, WRITE); | ||
72 | wake_up(&rl->wait[WRITE]); | ||
73 | } | ||
74 | spin_unlock_irq(q->queue_lock); | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | static ssize_t queue_ra_show(struct request_queue *q, char *page) | ||
79 | { | ||
80 | int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); | ||
81 | |||
82 | return queue_var_show(ra_kb, (page)); | ||
83 | } | ||
84 | |||
85 | static ssize_t | ||
86 | queue_ra_store(struct request_queue *q, const char *page, size_t count) | ||
87 | { | ||
88 | unsigned long ra_kb; | ||
89 | ssize_t ret = queue_var_store(&ra_kb, page, count); | ||
90 | |||
91 | spin_lock_irq(q->queue_lock); | ||
92 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | ||
93 | spin_unlock_irq(q->queue_lock); | ||
94 | |||
95 | return ret; | ||
96 | } | ||
97 | |||
98 | static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) | ||
99 | { | ||
100 | int max_sectors_kb = q->max_sectors >> 1; | ||
101 | |||
102 | return queue_var_show(max_sectors_kb, (page)); | ||
103 | } | ||
104 | |||
105 | static ssize_t | ||
106 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | ||
107 | { | ||
108 | unsigned long max_sectors_kb, | ||
109 | max_hw_sectors_kb = q->max_hw_sectors >> 1, | ||
110 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | ||
111 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | ||
112 | |||
113 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | ||
114 | return -EINVAL; | ||
115 | /* | ||
116 | * Take the queue lock to update the readahead and max_sectors | ||
117 | * values synchronously: | ||
118 | */ | ||
119 | spin_lock_irq(q->queue_lock); | ||
120 | q->max_sectors = max_sectors_kb << 1; | ||
121 | spin_unlock_irq(q->queue_lock); | ||
122 | |||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) | ||
127 | { | ||
128 | int max_hw_sectors_kb = q->max_hw_sectors >> 1; | ||
129 | |||
130 | return queue_var_show(max_hw_sectors_kb, (page)); | ||
131 | } | ||
132 | |||
133 | |||
134 | static struct queue_sysfs_entry queue_requests_entry = { | ||
135 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | ||
136 | .show = queue_requests_show, | ||
137 | .store = queue_requests_store, | ||
138 | }; | ||
139 | |||
140 | static struct queue_sysfs_entry queue_ra_entry = { | ||
141 | .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, | ||
142 | .show = queue_ra_show, | ||
143 | .store = queue_ra_store, | ||
144 | }; | ||
145 | |||
146 | static struct queue_sysfs_entry queue_max_sectors_entry = { | ||
147 | .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, | ||
148 | .show = queue_max_sectors_show, | ||
149 | .store = queue_max_sectors_store, | ||
150 | }; | ||
151 | |||
152 | static struct queue_sysfs_entry queue_max_hw_sectors_entry = { | ||
153 | .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, | ||
154 | .show = queue_max_hw_sectors_show, | ||
155 | }; | ||
156 | |||
157 | static struct queue_sysfs_entry queue_iosched_entry = { | ||
158 | .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, | ||
159 | .show = elv_iosched_show, | ||
160 | .store = elv_iosched_store, | ||
161 | }; | ||
162 | |||
163 | static struct attribute *default_attrs[] = { | ||
164 | &queue_requests_entry.attr, | ||
165 | &queue_ra_entry.attr, | ||
166 | &queue_max_hw_sectors_entry.attr, | ||
167 | &queue_max_sectors_entry.attr, | ||
168 | &queue_iosched_entry.attr, | ||
169 | NULL, | ||
170 | }; | ||
171 | |||
172 | #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) | ||
173 | |||
174 | static ssize_t | ||
175 | queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
176 | { | ||
177 | struct queue_sysfs_entry *entry = to_queue(attr); | ||
178 | struct request_queue *q = | ||
179 | container_of(kobj, struct request_queue, kobj); | ||
180 | ssize_t res; | ||
181 | |||
182 | if (!entry->show) | ||
183 | return -EIO; | ||
184 | mutex_lock(&q->sysfs_lock); | ||
185 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | ||
186 | mutex_unlock(&q->sysfs_lock); | ||
187 | return -ENOENT; | ||
188 | } | ||
189 | res = entry->show(q, page); | ||
190 | mutex_unlock(&q->sysfs_lock); | ||
191 | return res; | ||
192 | } | ||
193 | |||
194 | static ssize_t | ||
195 | queue_attr_store(struct kobject *kobj, struct attribute *attr, | ||
196 | const char *page, size_t length) | ||
197 | { | ||
198 | struct queue_sysfs_entry *entry = to_queue(attr); | ||
199 | struct request_queue *q = container_of(kobj, struct request_queue, kobj); | ||
200 | |||
201 | ssize_t res; | ||
202 | |||
203 | if (!entry->store) | ||
204 | return -EIO; | ||
205 | mutex_lock(&q->sysfs_lock); | ||
206 | if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { | ||
207 | mutex_unlock(&q->sysfs_lock); | ||
208 | return -ENOENT; | ||
209 | } | ||
210 | res = entry->store(q, page, length); | ||
211 | mutex_unlock(&q->sysfs_lock); | ||
212 | return res; | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed | ||
217 | * @kobj: the kobj belonging of the request queue to be released | ||
218 | * | ||
219 | * Description: | ||
220 | * blk_cleanup_queue is the pair to blk_init_queue() or | ||
221 | * blk_queue_make_request(). It should be called when a request queue is | ||
222 | * being released; typically when a block device is being de-registered. | ||
223 | * Currently, its primary task it to free all the &struct request | ||
224 | * structures that were allocated to the queue and the queue itself. | ||
225 | * | ||
226 | * Caveat: | ||
227 | * Hopefully the low level driver will have finished any | ||
228 | * outstanding requests first... | ||
229 | **/ | ||
230 | static void blk_release_queue(struct kobject *kobj) | ||
231 | { | ||
232 | struct request_queue *q = | ||
233 | container_of(kobj, struct request_queue, kobj); | ||
234 | struct request_list *rl = &q->rq; | ||
235 | |||
236 | blk_sync_queue(q); | ||
237 | |||
238 | if (rl->rq_pool) | ||
239 | mempool_destroy(rl->rq_pool); | ||
240 | |||
241 | if (q->queue_tags) | ||
242 | __blk_queue_free_tags(q); | ||
243 | |||
244 | blk_trace_shutdown(q); | ||
245 | |||
246 | bdi_destroy(&q->backing_dev_info); | ||
247 | kmem_cache_free(blk_requestq_cachep, q); | ||
248 | } | ||
249 | |||
250 | static struct sysfs_ops queue_sysfs_ops = { | ||
251 | .show = queue_attr_show, | ||
252 | .store = queue_attr_store, | ||
253 | }; | ||
254 | |||
255 | struct kobj_type blk_queue_ktype = { | ||
256 | .sysfs_ops = &queue_sysfs_ops, | ||
257 | .default_attrs = default_attrs, | ||
258 | .release = blk_release_queue, | ||
259 | }; | ||
260 | |||
261 | int blk_register_queue(struct gendisk *disk) | ||
262 | { | ||
263 | int ret; | ||
264 | |||
265 | struct request_queue *q = disk->queue; | ||
266 | |||
267 | if (!q || !q->request_fn) | ||
268 | return -ENXIO; | ||
269 | |||
270 | ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), | ||
271 | "%s", "queue"); | ||
272 | if (ret < 0) | ||
273 | return ret; | ||
274 | |||
275 | kobject_uevent(&q->kobj, KOBJ_ADD); | ||
276 | |||
277 | ret = elv_register_queue(q); | ||
278 | if (ret) { | ||
279 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | ||
280 | kobject_del(&q->kobj); | ||
281 | return ret; | ||
282 | } | ||
283 | |||
284 | return 0; | ||
285 | } | ||
286 | |||
287 | void blk_unregister_queue(struct gendisk *disk) | ||
288 | { | ||
289 | struct request_queue *q = disk->queue; | ||
290 | |||
291 | if (q && q->request_fn) { | ||
292 | elv_unregister_queue(q); | ||
293 | |||
294 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | ||
295 | kobject_del(&q->kobj); | ||
296 | kobject_put(&disk->dev.kobj); | ||
297 | } | ||
298 | } | ||
diff --git a/block/blk-tag.c b/block/blk-tag.c new file mode 100644 index 000000000000..d1fd300e8aea --- /dev/null +++ b/block/blk-tag.c | |||
@@ -0,0 +1,396 @@ | |||
1 | /* | ||
2 | * Functions related to tagged command queuing | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/bio.h> | ||
7 | #include <linux/blkdev.h> | ||
8 | |||
9 | /** | ||
10 | * blk_queue_find_tag - find a request by its tag and queue | ||
11 | * @q: The request queue for the device | ||
12 | * @tag: The tag of the request | ||
13 | * | ||
14 | * Notes: | ||
15 | * Should be used when a device returns a tag and you want to match | ||
16 | * it with a request. | ||
17 | * | ||
18 | * no locks need be held. | ||
19 | **/ | ||
20 | struct request *blk_queue_find_tag(struct request_queue *q, int tag) | ||
21 | { | ||
22 | return blk_map_queue_find_tag(q->queue_tags, tag); | ||
23 | } | ||
24 | |||
25 | EXPORT_SYMBOL(blk_queue_find_tag); | ||
26 | |||
27 | /** | ||
28 | * __blk_free_tags - release a given set of tag maintenance info | ||
29 | * @bqt: the tag map to free | ||
30 | * | ||
31 | * Tries to free the specified @bqt@. Returns true if it was | ||
32 | * actually freed and false if there are still references using it | ||
33 | */ | ||
34 | static int __blk_free_tags(struct blk_queue_tag *bqt) | ||
35 | { | ||
36 | int retval; | ||
37 | |||
38 | retval = atomic_dec_and_test(&bqt->refcnt); | ||
39 | if (retval) { | ||
40 | BUG_ON(bqt->busy); | ||
41 | |||
42 | kfree(bqt->tag_index); | ||
43 | bqt->tag_index = NULL; | ||
44 | |||
45 | kfree(bqt->tag_map); | ||
46 | bqt->tag_map = NULL; | ||
47 | |||
48 | kfree(bqt); | ||
49 | } | ||
50 | |||
51 | return retval; | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * __blk_queue_free_tags - release tag maintenance info | ||
56 | * @q: the request queue for the device | ||
57 | * | ||
58 | * Notes: | ||
59 | * blk_cleanup_queue() will take care of calling this function, if tagging | ||
60 | * has been used. So there's no need to call this directly. | ||
61 | **/ | ||
62 | void __blk_queue_free_tags(struct request_queue *q) | ||
63 | { | ||
64 | struct blk_queue_tag *bqt = q->queue_tags; | ||
65 | |||
66 | if (!bqt) | ||
67 | return; | ||
68 | |||
69 | __blk_free_tags(bqt); | ||
70 | |||
71 | q->queue_tags = NULL; | ||
72 | q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * blk_free_tags - release a given set of tag maintenance info | ||
77 | * @bqt: the tag map to free | ||
78 | * | ||
79 | * For externally managed @bqt@ frees the map. Callers of this | ||
80 | * function must guarantee to have released all the queues that | ||
81 | * might have been using this tag map. | ||
82 | */ | ||
83 | void blk_free_tags(struct blk_queue_tag *bqt) | ||
84 | { | ||
85 | if (unlikely(!__blk_free_tags(bqt))) | ||
86 | BUG(); | ||
87 | } | ||
88 | EXPORT_SYMBOL(blk_free_tags); | ||
89 | |||
90 | /** | ||
91 | * blk_queue_free_tags - release tag maintenance info | ||
92 | * @q: the request queue for the device | ||
93 | * | ||
94 | * Notes: | ||
95 | * This is used to disabled tagged queuing to a device, yet leave | ||
96 | * queue in function. | ||
97 | **/ | ||
98 | void blk_queue_free_tags(struct request_queue *q) | ||
99 | { | ||
100 | clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | ||
101 | } | ||
102 | |||
103 | EXPORT_SYMBOL(blk_queue_free_tags); | ||
104 | |||
105 | static int | ||
106 | init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) | ||
107 | { | ||
108 | struct request **tag_index; | ||
109 | unsigned long *tag_map; | ||
110 | int nr_ulongs; | ||
111 | |||
112 | if (q && depth > q->nr_requests * 2) { | ||
113 | depth = q->nr_requests * 2; | ||
114 | printk(KERN_ERR "%s: adjusted depth to %d\n", | ||
115 | __FUNCTION__, depth); | ||
116 | } | ||
117 | |||
118 | tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); | ||
119 | if (!tag_index) | ||
120 | goto fail; | ||
121 | |||
122 | nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; | ||
123 | tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); | ||
124 | if (!tag_map) | ||
125 | goto fail; | ||
126 | |||
127 | tags->real_max_depth = depth; | ||
128 | tags->max_depth = depth; | ||
129 | tags->tag_index = tag_index; | ||
130 | tags->tag_map = tag_map; | ||
131 | |||
132 | return 0; | ||
133 | fail: | ||
134 | kfree(tag_index); | ||
135 | return -ENOMEM; | ||
136 | } | ||
137 | |||
138 | static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, | ||
139 | int depth) | ||
140 | { | ||
141 | struct blk_queue_tag *tags; | ||
142 | |||
143 | tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); | ||
144 | if (!tags) | ||
145 | goto fail; | ||
146 | |||
147 | if (init_tag_map(q, tags, depth)) | ||
148 | goto fail; | ||
149 | |||
150 | tags->busy = 0; | ||
151 | atomic_set(&tags->refcnt, 1); | ||
152 | return tags; | ||
153 | fail: | ||
154 | kfree(tags); | ||
155 | return NULL; | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * blk_init_tags - initialize the tag info for an external tag map | ||
160 | * @depth: the maximum queue depth supported | ||
161 | * @tags: the tag to use | ||
162 | **/ | ||
163 | struct blk_queue_tag *blk_init_tags(int depth) | ||
164 | { | ||
165 | return __blk_queue_init_tags(NULL, depth); | ||
166 | } | ||
167 | EXPORT_SYMBOL(blk_init_tags); | ||
168 | |||
169 | /** | ||
170 | * blk_queue_init_tags - initialize the queue tag info | ||
171 | * @q: the request queue for the device | ||
172 | * @depth: the maximum queue depth supported | ||
173 | * @tags: the tag to use | ||
174 | **/ | ||
175 | int blk_queue_init_tags(struct request_queue *q, int depth, | ||
176 | struct blk_queue_tag *tags) | ||
177 | { | ||
178 | int rc; | ||
179 | |||
180 | BUG_ON(tags && q->queue_tags && tags != q->queue_tags); | ||
181 | |||
182 | if (!tags && !q->queue_tags) { | ||
183 | tags = __blk_queue_init_tags(q, depth); | ||
184 | |||
185 | if (!tags) | ||
186 | goto fail; | ||
187 | } else if (q->queue_tags) { | ||
188 | if ((rc = blk_queue_resize_tags(q, depth))) | ||
189 | return rc; | ||
190 | set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); | ||
191 | return 0; | ||
192 | } else | ||
193 | atomic_inc(&tags->refcnt); | ||
194 | |||
195 | /* | ||
196 | * assign it, all done | ||
197 | */ | ||
198 | q->queue_tags = tags; | ||
199 | q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); | ||
200 | INIT_LIST_HEAD(&q->tag_busy_list); | ||
201 | return 0; | ||
202 | fail: | ||
203 | kfree(tags); | ||
204 | return -ENOMEM; | ||
205 | } | ||
206 | |||
207 | EXPORT_SYMBOL(blk_queue_init_tags); | ||
208 | |||
209 | /** | ||
210 | * blk_queue_resize_tags - change the queueing depth | ||
211 | * @q: the request queue for the device | ||
212 | * @new_depth: the new max command queueing depth | ||
213 | * | ||
214 | * Notes: | ||
215 | * Must be called with the queue lock held. | ||
216 | **/ | ||
217 | int blk_queue_resize_tags(struct request_queue *q, int new_depth) | ||
218 | { | ||
219 | struct blk_queue_tag *bqt = q->queue_tags; | ||
220 | struct request **tag_index; | ||
221 | unsigned long *tag_map; | ||
222 | int max_depth, nr_ulongs; | ||
223 | |||
224 | if (!bqt) | ||
225 | return -ENXIO; | ||
226 | |||
227 | /* | ||
228 | * if we already have large enough real_max_depth. just | ||
229 | * adjust max_depth. *NOTE* as requests with tag value | ||
230 | * between new_depth and real_max_depth can be in-flight, tag | ||
231 | * map can not be shrunk blindly here. | ||
232 | */ | ||
233 | if (new_depth <= bqt->real_max_depth) { | ||
234 | bqt->max_depth = new_depth; | ||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * Currently cannot replace a shared tag map with a new | ||
240 | * one, so error out if this is the case | ||
241 | */ | ||
242 | if (atomic_read(&bqt->refcnt) != 1) | ||
243 | return -EBUSY; | ||
244 | |||
245 | /* | ||
246 | * save the old state info, so we can copy it back | ||
247 | */ | ||
248 | tag_index = bqt->tag_index; | ||
249 | tag_map = bqt->tag_map; | ||
250 | max_depth = bqt->real_max_depth; | ||
251 | |||
252 | if (init_tag_map(q, bqt, new_depth)) | ||
253 | return -ENOMEM; | ||
254 | |||
255 | memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); | ||
256 | nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; | ||
257 | memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); | ||
258 | |||
259 | kfree(tag_index); | ||
260 | kfree(tag_map); | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | EXPORT_SYMBOL(blk_queue_resize_tags); | ||
265 | |||
266 | /** | ||
267 | * blk_queue_end_tag - end tag operations for a request | ||
268 | * @q: the request queue for the device | ||
269 | * @rq: the request that has completed | ||
270 | * | ||
271 | * Description: | ||
272 | * Typically called when end_that_request_first() returns 0, meaning | ||
273 | * all transfers have been done for a request. It's important to call | ||
274 | * this function before end_that_request_last(), as that will put the | ||
275 | * request back on the free list thus corrupting the internal tag list. | ||
276 | * | ||
277 | * Notes: | ||
278 | * queue lock must be held. | ||
279 | **/ | ||
280 | void blk_queue_end_tag(struct request_queue *q, struct request *rq) | ||
281 | { | ||
282 | struct blk_queue_tag *bqt = q->queue_tags; | ||
283 | int tag = rq->tag; | ||
284 | |||
285 | BUG_ON(tag == -1); | ||
286 | |||
287 | if (unlikely(tag >= bqt->real_max_depth)) | ||
288 | /* | ||
289 | * This can happen after tag depth has been reduced. | ||
290 | * FIXME: how about a warning or info message here? | ||
291 | */ | ||
292 | return; | ||
293 | |||
294 | list_del_init(&rq->queuelist); | ||
295 | rq->cmd_flags &= ~REQ_QUEUED; | ||
296 | rq->tag = -1; | ||
297 | |||
298 | if (unlikely(bqt->tag_index[tag] == NULL)) | ||
299 | printk(KERN_ERR "%s: tag %d is missing\n", | ||
300 | __FUNCTION__, tag); | ||
301 | |||
302 | bqt->tag_index[tag] = NULL; | ||
303 | |||
304 | if (unlikely(!test_bit(tag, bqt->tag_map))) { | ||
305 | printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", | ||
306 | __FUNCTION__, tag); | ||
307 | return; | ||
308 | } | ||
309 | /* | ||
310 | * The tag_map bit acts as a lock for tag_index[bit], so we need | ||
311 | * unlock memory barrier semantics. | ||
312 | */ | ||
313 | clear_bit_unlock(tag, bqt->tag_map); | ||
314 | bqt->busy--; | ||
315 | } | ||
316 | |||
317 | EXPORT_SYMBOL(blk_queue_end_tag); | ||
318 | |||
319 | /** | ||
320 | * blk_queue_start_tag - find a free tag and assign it | ||
321 | * @q: the request queue for the device | ||
322 | * @rq: the block request that needs tagging | ||
323 | * | ||
324 | * Description: | ||
325 | * This can either be used as a stand-alone helper, or possibly be | ||
326 | * assigned as the queue &prep_rq_fn (in which case &struct request | ||
327 | * automagically gets a tag assigned). Note that this function | ||
328 | * assumes that any type of request can be queued! if this is not | ||
329 | * true for your device, you must check the request type before | ||
330 | * calling this function. The request will also be removed from | ||
331 | * the request queue, so it's the drivers responsibility to readd | ||
332 | * it if it should need to be restarted for some reason. | ||
333 | * | ||
334 | * Notes: | ||
335 | * queue lock must be held. | ||
336 | **/ | ||
337 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) | ||
338 | { | ||
339 | struct blk_queue_tag *bqt = q->queue_tags; | ||
340 | int tag; | ||
341 | |||
342 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | ||
343 | printk(KERN_ERR | ||
344 | "%s: request %p for device [%s] already tagged %d", | ||
345 | __FUNCTION__, rq, | ||
346 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); | ||
347 | BUG(); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * Protect against shared tag maps, as we may not have exclusive | ||
352 | * access to the tag map. | ||
353 | */ | ||
354 | do { | ||
355 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | ||
356 | if (tag >= bqt->max_depth) | ||
357 | return 1; | ||
358 | |||
359 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); | ||
360 | /* | ||
361 | * We need lock ordering semantics given by test_and_set_bit_lock. | ||
362 | * See blk_queue_end_tag for details. | ||
363 | */ | ||
364 | |||
365 | rq->cmd_flags |= REQ_QUEUED; | ||
366 | rq->tag = tag; | ||
367 | bqt->tag_index[tag] = rq; | ||
368 | blkdev_dequeue_request(rq); | ||
369 | list_add(&rq->queuelist, &q->tag_busy_list); | ||
370 | bqt->busy++; | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | EXPORT_SYMBOL(blk_queue_start_tag); | ||
375 | |||
376 | /** | ||
377 | * blk_queue_invalidate_tags - invalidate all pending tags | ||
378 | * @q: the request queue for the device | ||
379 | * | ||
380 | * Description: | ||
381 | * Hardware conditions may dictate a need to stop all pending requests. | ||
382 | * In this case, we will safely clear the block side of the tag queue and | ||
383 | * readd all requests to the request queue in the right order. | ||
384 | * | ||
385 | * Notes: | ||
386 | * queue lock must be held. | ||
387 | **/ | ||
388 | void blk_queue_invalidate_tags(struct request_queue *q) | ||
389 | { | ||
390 | struct list_head *tmp, *n; | ||
391 | |||
392 | list_for_each_safe(tmp, n, &q->tag_busy_list) | ||
393 | blk_requeue_request(q, list_entry_rq(tmp)); | ||
394 | } | ||
395 | |||
396 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | ||
diff --git a/block/blk.h b/block/blk.h new file mode 100644 index 000000000000..d88549df1b09 --- /dev/null +++ b/block/blk.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef BLK_INTERNAL_H | ||
2 | #define BLK_INTERNAL_H | ||
3 | |||
4 | extern struct kmem_cache *blk_requestq_cachep; | ||
5 | extern struct kobj_type blk_queue_ktype; | ||
6 | |||
7 | void __blk_queue_free_tags(struct request_queue *q); | ||
8 | |||
9 | void blk_queue_congestion_threshold(struct request_queue *q); | ||
10 | |||
11 | /* | ||
12 | * Return the threshold (number of used requests) at which the queue is | ||
13 | * considered to be congested. It include a little hysteresis to keep the | ||
14 | * context switch rate down. | ||
15 | */ | ||
16 | static inline int queue_congestion_on_threshold(struct request_queue *q) | ||
17 | { | ||
18 | return q->nr_congestion_on; | ||
19 | } | ||
20 | |||
21 | /* | ||
22 | * The threshold at which a queue is considered to be uncongested | ||
23 | */ | ||
24 | static inline int queue_congestion_off_threshold(struct request_queue *q) | ||
25 | { | ||
26 | return q->nr_congestion_off; | ||
27 | } | ||
28 | |||
29 | #endif | ||