diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-11-11 21:39:18 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-11-11 21:59:40 -0500 |
commit | 42b36cc0ce717deeb10030141a43dede763a3ebe (patch) | |
tree | b2dc48b4f16c5dc59461ad24b027d631edda1da4 | |
parent | 1200e646ae238afc536be70257290eb33fb6e364 (diff) |
virtio: Force use of power-of-two for descriptor ring sizes
The virtio descriptor rings of size N-1 were nicely set up to be
aligned to an N-byte boundary. But as Anthony Liguori points out, the
free-running indices used by virtio require that the sizes be a power
of 2, otherwise we get problems on wrap (demonstrated with lguest).
So we replace the clever "2^n-1" scheme with a simple "align to page
boundary" scheme: this means that all virtio rings take at least two
pages, but it's safer than guessing cache alignment.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r-- | Documentation/lguest/lguest.c | 9 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 3 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 8 | ||||
-rw-r--r-- | include/linux/virtio_ring.h | 19 |
4 files changed, 25 insertions, 14 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 157f6a26b939..42008395534d 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -62,8 +62,8 @@ typedef uint8_t u8; | |||
62 | #endif | 62 | #endif |
63 | /* We can have up to 256 pages for devices. */ | 63 | /* We can have up to 256 pages for devices. */ |
64 | #define DEVICE_PAGES 256 | 64 | #define DEVICE_PAGES 256 |
65 | /* This fits nicely in a single 4096-byte page. */ | 65 | /* This will occupy 2 pages: it must be a power of 2. */ |
66 | #define VIRTQUEUE_NUM 127 | 66 | #define VIRTQUEUE_NUM 128 |
67 | 67 | ||
68 | /*L:120 verbose is both a global flag and a macro. The C preprocessor allows | 68 | /*L:120 verbose is both a global flag and a macro. The C preprocessor allows |
69 | * this, and although I wouldn't recommend it, it works quite nicely here. */ | 69 | * this, and although I wouldn't recommend it, it works quite nicely here. */ |
@@ -1036,7 +1036,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1036 | void *p; | 1036 | void *p; |
1037 | 1037 | ||
1038 | /* First we need some pages for this virtqueue. */ | 1038 | /* First we need some pages for this virtqueue. */ |
1039 | pages = (vring_size(num_descs) + getpagesize() - 1) / getpagesize(); | 1039 | pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) |
1040 | / getpagesize(); | ||
1040 | p = get_pages(pages); | 1041 | p = get_pages(pages); |
1041 | 1042 | ||
1042 | /* Initialize the configuration. */ | 1043 | /* Initialize the configuration. */ |
@@ -1045,7 +1046,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, | |||
1045 | vq->config.pfn = to_guest_phys(p) / getpagesize(); | 1046 | vq->config.pfn = to_guest_phys(p) / getpagesize(); |
1046 | 1047 | ||
1047 | /* Initialize the vring. */ | 1048 | /* Initialize the vring. */ |
1048 | vring_init(&vq->vring, num_descs, p); | 1049 | vring_init(&vq->vring, num_descs, p, getpagesize()); |
1049 | 1050 | ||
1050 | /* Add the configuration information to this device's descriptor. */ | 1051 | /* Add the configuration information to this device's descriptor. */ |
1051 | add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, | 1052 | add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 8904f72f97c6..66f38722253a 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c | |||
@@ -200,7 +200,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
200 | 200 | ||
201 | /* Figure out how many pages the ring will take, and map that memory */ | 201 | /* Figure out how many pages the ring will take, and map that memory */ |
202 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, | 202 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, |
203 | DIV_ROUND_UP(vring_size(lvq->config.num), | 203 | DIV_ROUND_UP(vring_size(lvq->config.num, |
204 | PAGE_SIZE), | ||
204 | PAGE_SIZE)); | 205 | PAGE_SIZE)); |
205 | if (!lvq->pages) { | 206 | if (!lvq->pages) { |
206 | err = -ENOMEM; | 207 | err = -ENOMEM; |
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 0e1bf053d8cd..1dc04b6684e6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -277,11 +277,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, | |||
277 | struct vring_virtqueue *vq; | 277 | struct vring_virtqueue *vq; |
278 | unsigned int i; | 278 | unsigned int i; |
279 | 279 | ||
280 | /* We assume num is a power of 2. */ | ||
281 | if (num & (num - 1)) { | ||
282 | dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); | ||
283 | return NULL; | ||
284 | } | ||
285 | |||
280 | vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); | 286 | vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); |
281 | if (!vq) | 287 | if (!vq) |
282 | return NULL; | 288 | return NULL; |
283 | 289 | ||
284 | vring_init(&vq->vring, num, pages); | 290 | vring_init(&vq->vring, num, pages, PAGE_SIZE); |
285 | vq->vq.callback = callback; | 291 | vq->vq.callback = callback; |
286 | vq->vq.vdev = vdev; | 292 | vq->vq.vdev = vdev; |
287 | vq->vq.vq_ops = &vring_vq_ops; | 293 | vq->vq.vq_ops = &vring_vq_ops; |
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 5b88d215af50..1a4ed49f6478 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h | |||
@@ -67,7 +67,7 @@ struct vring { | |||
67 | }; | 67 | }; |
68 | 68 | ||
69 | /* The standard layout for the ring is a continuous chunk of memory which looks | 69 | /* The standard layout for the ring is a continuous chunk of memory which looks |
70 | * like this. The used fields will be aligned to a "num+1" boundary. | 70 | * like this. We assume num is a power of 2. |
71 | * | 71 | * |
72 | * struct vring | 72 | * struct vring |
73 | * { | 73 | * { |
@@ -79,8 +79,8 @@ struct vring { | |||
79 | * __u16 avail_idx; | 79 | * __u16 avail_idx; |
80 | * __u16 available[num]; | 80 | * __u16 available[num]; |
81 | * | 81 | * |
82 | * // Padding so a correctly-chosen num value will cache-align used_idx. | 82 | * // Padding to the next page boundary. |
83 | * char pad[sizeof(struct vring_desc) - sizeof(avail_flags)]; | 83 | * char pad[]; |
84 | * | 84 | * |
85 | * // A ring of used descriptor heads with free-running index. | 85 | * // A ring of used descriptor heads with free-running index. |
86 | * __u16 used_flags; | 86 | * __u16 used_flags; |
@@ -88,18 +88,21 @@ struct vring { | |||
88 | * struct vring_used_elem used[num]; | 88 | * struct vring_used_elem used[num]; |
89 | * }; | 89 | * }; |
90 | */ | 90 | */ |
91 | static inline void vring_init(struct vring *vr, unsigned int num, void *p) | 91 | static inline void vring_init(struct vring *vr, unsigned int num, void *p, |
92 | unsigned int pagesize) | ||
92 | { | 93 | { |
93 | vr->num = num; | 94 | vr->num = num; |
94 | vr->desc = p; | 95 | vr->desc = p; |
95 | vr->avail = p + num*sizeof(struct vring_desc); | 96 | vr->avail = p + num*sizeof(struct vring_desc); |
96 | vr->used = p + (num+1)*(sizeof(struct vring_desc) + sizeof(__u16)); | 97 | vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1) |
98 | & ~(pagesize - 1)); | ||
97 | } | 99 | } |
98 | 100 | ||
99 | static inline unsigned vring_size(unsigned int num) | 101 | static inline unsigned vring_size(unsigned int num, unsigned int pagesize) |
100 | { | 102 | { |
101 | return (num + 1) * (sizeof(struct vring_desc) + sizeof(__u16)) | 103 | return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) |
102 | + sizeof(__u32) + num * sizeof(struct vring_used_elem); | 104 | + pagesize - 1) & ~(pagesize - 1)) |
105 | + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; | ||
103 | } | 106 | } |
104 | 107 | ||
105 | #ifdef __KERNEL__ | 108 | #ifdef __KERNEL__ |