aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorBob Liu <bob.liu@oracle.com>2015-06-03 01:40:03 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2015-06-05 21:14:05 -0400
commit86839c56dee28c315a4c19b7bfee450ccd84cd25 (patch)
tree325db48d042f41fbea511754e7bdf4799d604960 /drivers/block
parent8ab0144a466320cc37c52e7866b5103c5bbd4e90 (diff)
xen/block: add multi-page ring support
Extend xen/block to support multi-page ring, so that more requests can be issued by using more than one pages as the request ring between blkfront and backend. As a result, the performance can get improved significantly. We got some impressive improvements on our highend iscsi storage cluster backend. If using 64 pages as the ring, the IOPS increased about 15 times for the throughput testing and above doubled for the latency testing. The reason was the limit on outstanding requests is 32 if use only one-page ring, but in our case the iscsi lun was spread across about 100 physical drives, 32 was really not enough to keep them busy. Changes in v2: - Rebased to 4.0-rc6. - Document on how multi-page ring feature working to linux io/blkif.h. Changes in v3: - Remove changes to linux io/blkif.h and follow the protocol defined in io/blkif.h of XEN tree. - Rebased to 4.1-rc3 Changes in v4: - Turn to use 'ring-page-order' and 'max-ring-page-order'. - A few comments from Roger. Changes in v5: - Clarify with 4k granularity to comment - Address more comments from Roger Signed-off-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/xen-blkback/blkback.c13
-rw-r--r--drivers/block/xen-blkback/common.h2
-rw-r--r--drivers/block/xen-blkback/xenbus.c89
-rw-r--r--drivers/block/xen-blkfront.c135
4 files changed, 180 insertions, 59 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index bd2b3bbbb22c..9121a2c3e26f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants,
84 "Maximum number of grants to map persistently"); 84 "Maximum number of grants to map persistently");
85 85
86/* 86/*
87 * Maximum order of pages to be used for the shared ring between front and
88 * backend, 4KB page granularity is used.
89 */
90unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
91module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
92MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
93/*
87 * The LRU mechanism to clean the lists of persistent grants needs to 94 * The LRU mechanism to clean the lists of persistent grants needs to
88 * be executed periodically. The time interval between consecutive executions 95 * be executed periodically. The time interval between consecutive executions
89 * of the purge mechanism is set in ms. 96 * of the purge mechanism is set in ms.
@@ -1451,6 +1458,12 @@ static int __init xen_blkif_init(void)
1451 if (!xen_domain()) 1458 if (!xen_domain())
1452 return -ENODEV; 1459 return -ENODEV;
1453 1460
1461 if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
1462 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
1463 xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
1464 xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
1465 }
1466
1454 rc = xen_blkif_interface_init(); 1467 rc = xen_blkif_interface_init();
1455 if (rc) 1468 if (rc)
1456 goto failed_init; 1469 goto failed_init;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 043f13b7b7b0..8ccc49d01c8e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,6 +44,7 @@
44#include <xen/interface/io/blkif.h> 44#include <xen/interface/io/blkif.h>
45#include <xen/interface/io/protocols.h> 45#include <xen/interface/io/protocols.h>
46 46
47extern unsigned int xen_blkif_max_ring_order;
47/* 48/*
48 * This is the maximum number of segments that would be allowed in indirect 49 * This is the maximum number of segments that would be allowed in indirect
49 * requests. This value will also be passed to the frontend. 50 * requests. This value will also be passed to the frontend.
@@ -320,6 +321,7 @@ struct xen_blkif {
320 struct work_struct free_work; 321 struct work_struct free_work;
321 /* Thread shutdown wait queue. */ 322 /* Thread shutdown wait queue. */
322 wait_queue_head_t shutdown_wq; 323 wait_queue_head_t shutdown_wq;
324 unsigned int nr_ring_pages;
323}; 325};
324 326
325struct seg_buf { 327struct seg_buf {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index c212d41fd5bd..deb3f001791f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -25,6 +25,7 @@
25 25
26/* Enlarge the array size in order to fully show blkback name. */ 26/* Enlarge the array size in order to fully show blkback name. */
27#define BLKBACK_NAME_LEN (20) 27#define BLKBACK_NAME_LEN (20)
28#define RINGREF_NAME_LEN (20)
28 29
29struct backend_info { 30struct backend_info {
30 struct xenbus_device *dev; 31 struct xenbus_device *dev;
@@ -156,8 +157,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
156 return blkif; 157 return blkif;
157} 158}
158 159
159static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, 160static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
160 unsigned int evtchn) 161 unsigned int nr_grefs, unsigned int evtchn)
161{ 162{
162 int err; 163 int err;
163 164
@@ -165,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
165 if (blkif->irq) 166 if (blkif->irq)
166 return 0; 167 return 0;
167 168
168 err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, 169 err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
169 &blkif->blk_ring); 170 &blkif->blk_ring);
170 if (err < 0) 171 if (err < 0)
171 return err; 172 return err;
@@ -175,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
175 { 176 {
176 struct blkif_sring *sring; 177 struct blkif_sring *sring;
177 sring = (struct blkif_sring *)blkif->blk_ring; 178 sring = (struct blkif_sring *)blkif->blk_ring;
178 BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); 179 BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
179 break; 180 break;
180 } 181 }
181 case BLKIF_PROTOCOL_X86_32: 182 case BLKIF_PROTOCOL_X86_32:
182 { 183 {
183 struct blkif_x86_32_sring *sring_x86_32; 184 struct blkif_x86_32_sring *sring_x86_32;
184 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; 185 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
185 BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); 186 BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
186 break; 187 break;
187 } 188 }
188 case BLKIF_PROTOCOL_X86_64: 189 case BLKIF_PROTOCOL_X86_64:
189 { 190 {
190 struct blkif_x86_64_sring *sring_x86_64; 191 struct blkif_x86_64_sring *sring_x86_64;
191 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; 192 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
192 BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); 193 BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
193 break; 194 break;
194 } 195 }
195 default: 196 default:
@@ -270,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
270 i++; 271 i++;
271 } 272 }
272 273
273 WARN_ON(i != XEN_BLKIF_REQS_PER_PAGE); 274 WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
274 275
275 kmem_cache_free(xen_blkif_cachep, blkif); 276 kmem_cache_free(xen_blkif_cachep, blkif);
276} 277}
@@ -555,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
555 if (err) 556 if (err)
556 goto fail; 557 goto fail;
557 558
559 err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
560 xen_blkif_max_ring_order);
561 if (err)
562 pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
563
558 err = xenbus_switch_state(dev, XenbusStateInitWait); 564 err = xenbus_switch_state(dev, XenbusStateInitWait);
559 if (err) 565 if (err)
560 goto fail; 566 goto fail;
@@ -818,8 +824,8 @@ again:
818static int connect_ring(struct backend_info *be) 824static int connect_ring(struct backend_info *be)
819{ 825{
820 struct xenbus_device *dev = be->dev; 826 struct xenbus_device *dev = be->dev;
821 unsigned long ring_ref; 827 unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
822 unsigned int evtchn; 828 unsigned int evtchn, nr_grefs, ring_page_order;
823 unsigned int pers_grants; 829 unsigned int pers_grants;
824 char protocol[64] = ""; 830 char protocol[64] = "";
825 struct pending_req *req, *n; 831 struct pending_req *req, *n;
@@ -827,14 +833,57 @@ static int connect_ring(struct backend_info *be)
827 833
828 pr_debug("%s %s\n", __func__, dev->otherend); 834 pr_debug("%s %s\n", __func__, dev->otherend);
829 835
830 err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", 836 err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
831 &ring_ref, "event-channel", "%u", &evtchn, NULL); 837 &evtchn);
832 if (err) { 838 if (err != 1) {
833 xenbus_dev_fatal(dev, err, 839 err = -EINVAL;
834 "reading %s/ring-ref and event-channel", 840 xenbus_dev_fatal(dev, err, "reading %s/event-channel",
835 dev->otherend); 841 dev->otherend);
836 return err; 842 return err;
837 } 843 }
844 pr_info("event-channel %u\n", evtchn);
845
846 err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
847 &ring_page_order);
848 if (err != 1) {
849 err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
850 "%u", &ring_ref[0]);
851 if (err != 1) {
852 err = -EINVAL;
853 xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
854 dev->otherend);
855 return err;
856 }
857 nr_grefs = 1;
858 pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
859 ring_ref[0]);
860 } else {
861 unsigned int i;
862
863 if (ring_page_order > xen_blkif_max_ring_order) {
864 err = -EINVAL;
865 xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
866 dev->otherend, ring_page_order,
867 xen_blkif_max_ring_order);
868 return err;
869 }
870
871 nr_grefs = 1 << ring_page_order;
872 for (i = 0; i < nr_grefs; i++) {
873 char ring_ref_name[RINGREF_NAME_LEN];
874
875 snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
876 err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
877 "%u", &ring_ref[i]);
878 if (err != 1) {
879 err = -EINVAL;
880 xenbus_dev_fatal(dev, err, "reading %s/%s",
881 dev->otherend, ring_ref_name);
882 return err;
883 }
884 pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
885 }
886 }
838 887
839 be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; 888 be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
840 err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", 889 err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -859,12 +908,13 @@ static int connect_ring(struct backend_info *be)
859 908
860 be->blkif->vbd.feature_gnt_persistent = pers_grants; 909 be->blkif->vbd.feature_gnt_persistent = pers_grants;
861 be->blkif->vbd.overflow_max_grants = 0; 910 be->blkif->vbd.overflow_max_grants = 0;
911 be->blkif->nr_ring_pages = nr_grefs;
862 912
863 pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", 913 pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
864 ring_ref, evtchn, be->blkif->blk_protocol, protocol, 914 nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
865 pers_grants ? "persistent grants" : ""); 915 pers_grants ? "persistent grants" : "");
866 916
867 for (i = 0; i < XEN_BLKIF_REQS_PER_PAGE; i++) { 917 for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
868 req = kzalloc(sizeof(*req), GFP_KERNEL); 918 req = kzalloc(sizeof(*req), GFP_KERNEL);
869 if (!req) 919 if (!req)
870 goto fail; 920 goto fail;
@@ -883,10 +933,9 @@ static int connect_ring(struct backend_info *be)
883 } 933 }
884 934
885 /* Map the shared frame, irq etc. */ 935 /* Map the shared frame, irq etc. */
886 err = xen_blkif_map(be->blkif, ring_ref, evtchn); 936 err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
887 if (err) { 937 if (err) {
888 xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", 938 xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
889 ring_ref, evtchn);
890 return err; 939 return err;
891 } 940 }
892 941
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 88e23fd8c7f3..d3c1a9523d1f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32;
98module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); 98module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
99MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); 99MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
100 100
101#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 101/*
102 * Maximum order of pages to be used for the shared ring between front and
103 * backend, 4KB page granularity is used.
104 */
105static unsigned int xen_blkif_max_ring_order;
106module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
107MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
108
109#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
110#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
111/*
112 * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
113 * characters are enough. Define to 20 to keep consist with backend.
114 */
115#define RINGREF_NAME_LEN (20)
102 116
103/* 117/*
104 * We have one of these per vbd, whether ide, scsi or 'other'. They 118 * We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -114,13 +128,14 @@ struct blkfront_info
114 int vdevice; 128 int vdevice;
115 blkif_vdev_t handle; 129 blkif_vdev_t handle;
116 enum blkif_state connected; 130 enum blkif_state connected;
117 int ring_ref; 131 int ring_ref[XENBUS_MAX_RING_PAGES];
132 unsigned int nr_ring_pages;
118 struct blkif_front_ring ring; 133 struct blkif_front_ring ring;
119 unsigned int evtchn, irq; 134 unsigned int evtchn, irq;
120 struct request_queue *rq; 135 struct request_queue *rq;
121 struct work_struct work; 136 struct work_struct work;
122 struct gnttab_free_callback callback; 137 struct gnttab_free_callback callback;
123 struct blk_shadow shadow[BLK_RING_SIZE]; 138 struct blk_shadow shadow[BLK_MAX_RING_SIZE];
124 struct list_head grants; 139 struct list_head grants;
125 struct list_head indirect_pages; 140 struct list_head indirect_pages;
126 unsigned int persistent_gnts_c; 141 unsigned int persistent_gnts_c;
@@ -139,8 +154,6 @@ static unsigned int nr_minors;
139static unsigned long *minors; 154static unsigned long *minors;
140static DEFINE_SPINLOCK(minor_lock); 155static DEFINE_SPINLOCK(minor_lock);
141 156
142#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
143 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
144#define GRANT_INVALID_REF 0 157#define GRANT_INVALID_REF 0
145 158
146#define PARTS_PER_DISK 16 159#define PARTS_PER_DISK 16
@@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info);
170static int get_id_from_freelist(struct blkfront_info *info) 183static int get_id_from_freelist(struct blkfront_info *info)
171{ 184{
172 unsigned long free = info->shadow_free; 185 unsigned long free = info->shadow_free;
173 BUG_ON(free >= BLK_RING_SIZE); 186 BUG_ON(free >= BLK_RING_SIZE(info));
174 info->shadow_free = info->shadow[free].req.u.rw.id; 187 info->shadow_free = info->shadow[free].req.u.rw.id;
175 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ 188 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
176 return free; 189 return free;
@@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
983 } 996 }
984 } 997 }
985 998
986 for (i = 0; i < BLK_RING_SIZE; i++) { 999 for (i = 0; i < BLK_RING_SIZE(info); i++) {
987 /* 1000 /*
988 * Clear persistent grants present in requests already 1001 * Clear persistent grants present in requests already
989 * on the shared ring 1002 * on the shared ring
@@ -1033,12 +1046,15 @@ free_shadow:
1033 flush_work(&info->work); 1046 flush_work(&info->work);
1034 1047
1035 /* Free resources associated with old device channel. */ 1048 /* Free resources associated with old device channel. */
1036 if (info->ring_ref != GRANT_INVALID_REF) { 1049 for (i = 0; i < info->nr_ring_pages; i++) {
1037 gnttab_end_foreign_access(info->ring_ref, 0, 1050 if (info->ring_ref[i] != GRANT_INVALID_REF) {
1038 (unsigned long)info->ring.sring); 1051 gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
1039 info->ring_ref = GRANT_INVALID_REF; 1052 info->ring_ref[i] = GRANT_INVALID_REF;
1040 info->ring.sring = NULL; 1053 }
1041 } 1054 }
1055 free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
1056 info->ring.sring = NULL;
1057
1042 if (info->irq) 1058 if (info->irq)
1043 unbind_from_irqhandler(info->irq, info); 1059 unbind_from_irqhandler(info->irq, info);
1044 info->evtchn = info->irq = 0; 1060 info->evtchn = info->irq = 0;
@@ -1157,7 +1173,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1157 * never have given to it (we stamp it up to BLK_RING_SIZE - 1173 * never have given to it (we stamp it up to BLK_RING_SIZE -
1158 * look in get_id_from_freelist. 1174 * look in get_id_from_freelist.
1159 */ 1175 */
1160 if (id >= BLK_RING_SIZE) { 1176 if (id >= BLK_RING_SIZE(info)) {
1161 WARN(1, "%s: response to %s has incorrect id (%ld)\n", 1177 WARN(1, "%s: response to %s has incorrect id (%ld)\n",
1162 info->gd->disk_name, op_name(bret->operation), id); 1178 info->gd->disk_name, op_name(bret->operation), id);
1163 /* We can't safely get the 'struct request' as 1179 /* We can't safely get the 'struct request' as
@@ -1245,26 +1261,30 @@ static int setup_blkring(struct xenbus_device *dev,
1245 struct blkfront_info *info) 1261 struct blkfront_info *info)
1246{ 1262{
1247 struct blkif_sring *sring; 1263 struct blkif_sring *sring;
1248 grant_ref_t gref; 1264 int err, i;
1249 int err; 1265 unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
1266 grant_ref_t gref[XENBUS_MAX_RING_PAGES];
1250 1267
1251 info->ring_ref = GRANT_INVALID_REF; 1268 for (i = 0; i < info->nr_ring_pages; i++)
1269 info->ring_ref[i] = GRANT_INVALID_REF;
1252 1270
1253 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); 1271 sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
1272 get_order(ring_size));
1254 if (!sring) { 1273 if (!sring) {
1255 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 1274 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1256 return -ENOMEM; 1275 return -ENOMEM;
1257 } 1276 }
1258 SHARED_RING_INIT(sring); 1277 SHARED_RING_INIT(sring);
1259 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 1278 FRONT_RING_INIT(&info->ring, sring, ring_size);
1260 1279
1261 err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); 1280 err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
1262 if (err < 0) { 1281 if (err < 0) {
1263 free_page((unsigned long)sring); 1282 free_pages((unsigned long)sring, get_order(ring_size));
1264 info->ring.sring = NULL; 1283 info->ring.sring = NULL;
1265 goto fail; 1284 goto fail;
1266 } 1285 }
1267 info->ring_ref = gref; 1286 for (i = 0; i < info->nr_ring_pages; i++)
1287 info->ring_ref[i] = gref[i];
1268 1288
1269 err = xenbus_alloc_evtchn(dev, &info->evtchn); 1289 err = xenbus_alloc_evtchn(dev, &info->evtchn);
1270 if (err) 1290 if (err)
@@ -1292,7 +1312,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
1292{ 1312{
1293 const char *message = NULL; 1313 const char *message = NULL;
1294 struct xenbus_transaction xbt; 1314 struct xenbus_transaction xbt;
1295 int err; 1315 int err, i;
1316 unsigned int max_page_order = 0;
1317 unsigned int ring_page_order = 0;
1318
1319 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1320 "max-ring-page-order", "%u", &max_page_order);
1321 if (err != 1)
1322 info->nr_ring_pages = 1;
1323 else {
1324 ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
1325 info->nr_ring_pages = 1 << ring_page_order;
1326 }
1296 1327
1297 /* Create shared ring, alloc event channel. */ 1328 /* Create shared ring, alloc event channel. */
1298 err = setup_blkring(dev, info); 1329 err = setup_blkring(dev, info);
@@ -1306,11 +1337,32 @@ again:
1306 goto destroy_blkring; 1337 goto destroy_blkring;
1307 } 1338 }
1308 1339
1309 err = xenbus_printf(xbt, dev->nodename, 1340 if (info->nr_ring_pages == 1) {
1310 "ring-ref", "%u", info->ring_ref); 1341 err = xenbus_printf(xbt, dev->nodename,
1311 if (err) { 1342 "ring-ref", "%u", info->ring_ref[0]);
1312 message = "writing ring-ref"; 1343 if (err) {
1313 goto abort_transaction; 1344 message = "writing ring-ref";
1345 goto abort_transaction;
1346 }
1347 } else {
1348 err = xenbus_printf(xbt, dev->nodename,
1349 "ring-page-order", "%u", ring_page_order);
1350 if (err) {
1351 message = "writing ring-page-order";
1352 goto abort_transaction;
1353 }
1354
1355 for (i = 0; i < info->nr_ring_pages; i++) {
1356 char ring_ref_name[RINGREF_NAME_LEN];
1357
1358 snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
1359 err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
1360 "%u", info->ring_ref[i]);
1361 if (err) {
1362 message = "writing ring-ref";
1363 goto abort_transaction;
1364 }
1365 }
1314 } 1366 }
1315 err = xenbus_printf(xbt, dev->nodename, 1367 err = xenbus_printf(xbt, dev->nodename,
1316 "event-channel", "%u", info->evtchn); 1368 "event-channel", "%u", info->evtchn);
@@ -1338,6 +1390,9 @@ again:
1338 goto destroy_blkring; 1390 goto destroy_blkring;
1339 } 1391 }
1340 1392
1393 for (i = 0; i < BLK_RING_SIZE(info); i++)
1394 info->shadow[i].req.u.rw.id = i+1;
1395 info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
1341 xenbus_switch_state(dev, XenbusStateInitialised); 1396 xenbus_switch_state(dev, XenbusStateInitialised);
1342 1397
1343 return 0; 1398 return 0;
@@ -1361,7 +1416,7 @@ again:
1361static int blkfront_probe(struct xenbus_device *dev, 1416static int blkfront_probe(struct xenbus_device *dev,
1362 const struct xenbus_device_id *id) 1417 const struct xenbus_device_id *id)
1363{ 1418{
1364 int err, vdevice, i; 1419 int err, vdevice;
1365 struct blkfront_info *info; 1420 struct blkfront_info *info;
1366 1421
1367 /* FIXME: Use dynamic device id if this is not set. */ 1422 /* FIXME: Use dynamic device id if this is not set. */
@@ -1422,10 +1477,6 @@ static int blkfront_probe(struct xenbus_device *dev,
1422 info->connected = BLKIF_STATE_DISCONNECTED; 1477 info->connected = BLKIF_STATE_DISCONNECTED;
1423 INIT_WORK(&info->work, blkif_restart_queue); 1478 INIT_WORK(&info->work, blkif_restart_queue);
1424 1479
1425 for (i = 0; i < BLK_RING_SIZE; i++)
1426 info->shadow[i].req.u.rw.id = i+1;
1427 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
1428
1429 /* Front end dir is a number, which is used as the id. */ 1480 /* Front end dir is a number, which is used as the id. */
1430 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 1481 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
1431 dev_set_drvdata(&dev->dev, info); 1482 dev_set_drvdata(&dev->dev, info);
@@ -1469,10 +1520,10 @@ static int blkif_recover(struct blkfront_info *info)
1469 1520
1470 /* Stage 2: Set up free list. */ 1521 /* Stage 2: Set up free list. */
1471 memset(&info->shadow, 0, sizeof(info->shadow)); 1522 memset(&info->shadow, 0, sizeof(info->shadow));
1472 for (i = 0; i < BLK_RING_SIZE; i++) 1523 for (i = 0; i < BLK_RING_SIZE(info); i++)
1473 info->shadow[i].req.u.rw.id = i+1; 1524 info->shadow[i].req.u.rw.id = i+1;
1474 info->shadow_free = info->ring.req_prod_pvt; 1525 info->shadow_free = info->ring.req_prod_pvt;
1475 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; 1526 info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
1476 1527
1477 rc = blkfront_setup_indirect(info); 1528 rc = blkfront_setup_indirect(info);
1478 if (rc) { 1529 if (rc) {
@@ -1484,7 +1535,7 @@ static int blkif_recover(struct blkfront_info *info)
1484 blk_queue_max_segments(info->rq, segs); 1535 blk_queue_max_segments(info->rq, segs);
1485 bio_list_init(&bio_list); 1536 bio_list_init(&bio_list);
1486 INIT_LIST_HEAD(&requests); 1537 INIT_LIST_HEAD(&requests);
1487 for (i = 0; i < BLK_RING_SIZE; i++) { 1538 for (i = 0; i < BLK_RING_SIZE(info); i++) {
1488 /* Not in use? */ 1539 /* Not in use? */
1489 if (!copy[i].request) 1540 if (!copy[i].request)
1490 continue; 1541 continue;
@@ -1690,7 +1741,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1690 segs = info->max_indirect_segments; 1741 segs = info->max_indirect_segments;
1691 } 1742 }
1692 1743
1693 err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); 1744 err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
1694 if (err) 1745 if (err)
1695 goto out_of_memory; 1746 goto out_of_memory;
1696 1747
@@ -1700,7 +1751,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1700 * grants, we need to allocate a set of pages that can be 1751 * grants, we need to allocate a set of pages that can be
1701 * used for mapping indirect grefs 1752 * used for mapping indirect grefs
1702 */ 1753 */
1703 int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE; 1754 int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
1704 1755
1705 BUG_ON(!list_empty(&info->indirect_pages)); 1756 BUG_ON(!list_empty(&info->indirect_pages));
1706 for (i = 0; i < num; i++) { 1757 for (i = 0; i < num; i++) {
@@ -1711,7 +1762,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1711 } 1762 }
1712 } 1763 }
1713 1764
1714 for (i = 0; i < BLK_RING_SIZE; i++) { 1765 for (i = 0; i < BLK_RING_SIZE(info); i++) {
1715 info->shadow[i].grants_used = kzalloc( 1766 info->shadow[i].grants_used = kzalloc(
1716 sizeof(info->shadow[i].grants_used[0]) * segs, 1767 sizeof(info->shadow[i].grants_used[0]) * segs,
1717 GFP_NOIO); 1768 GFP_NOIO);
@@ -1733,7 +1784,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
1733 return 0; 1784 return 0;
1734 1785
1735out_of_memory: 1786out_of_memory:
1736 for (i = 0; i < BLK_RING_SIZE; i++) { 1787 for (i = 0; i < BLK_RING_SIZE(info); i++) {
1737 kfree(info->shadow[i].grants_used); 1788 kfree(info->shadow[i].grants_used);
1738 info->shadow[i].grants_used = NULL; 1789 info->shadow[i].grants_used = NULL;
1739 kfree(info->shadow[i].sg); 1790 kfree(info->shadow[i].sg);
@@ -2089,6 +2140,12 @@ static int __init xlblk_init(void)
2089 if (!xen_domain()) 2140 if (!xen_domain())
2090 return -ENODEV; 2141 return -ENODEV;
2091 2142
2143 if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
2144 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
2145 xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
2146 xen_blkif_max_ring_order = 0;
2147 }
2148
2092 if (!xen_has_pv_disk_devices()) 2149 if (!xen_has_pv_disk_devices())
2093 return -ENODEV; 2150 return -ENODEV;
2094 2151