diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 08:04:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 08:04:56 -0400 |
commit | dc92b1f9ab1e1665dbbc56911782358e7f9a49f9 (patch) | |
tree | 965ccb4a0f2c24a8b24adce415f6506246d07a90 | |
parent | 5e090ed7af10729a396a25df43d69a236e789736 (diff) | |
parent | ca16f580a5db7e60bfafe59a50bb133bd3347491 (diff) |
Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio changes from Rusty Russell:
"New workflow: same git trees pulled by linux-next get sent straight to
Linus. Git is awkward at shuffling patches compared with quilt or mq,
but that doesn't happen often once things get into my -next branch."
* 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits)
lguest: fix occasional crash in example launcher.
virtio-blk: Disable callback in virtblk_done()
virtio_mmio: Don't attempt to create empty virtqueues
virtio_mmio: fix off by one error allocating queue
drivers/virtio/virtio_pci.c: fix error return code
virtio: don't crash when device is buggy
virtio: remove CONFIG_VIRTIO_RING
virtio: add help to CONFIG_VIRTIO option.
virtio: support reserved vqs
virtio: introduce an API to set affinity for a virtqueue
virtio-ring: move queue_index to vring_virtqueue
virtio_balloon: not EXPERIMENTAL any more.
virtio-balloon: dependency fix
virtio-blk: fix NULL checking in virtblk_alloc_req()
virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path
virtio-blk: Add bio-based IO path for virtio-blk
virtio: console: fix error handling in init() function
tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace
tools: Add guest trace agent as a user tool
virtio/console: Allocate scatterlist according to the current pipe size
...
25 files changed, 1391 insertions, 106 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f9acddd9ace3..c8af429991d9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -656,7 +656,6 @@ config S390_GUEST | |||
656 | depends on 64BIT && EXPERIMENTAL | 656 | depends on 64BIT && EXPERIMENTAL |
657 | select VIRTUALIZATION | 657 | select VIRTUALIZATION |
658 | select VIRTIO | 658 | select VIRTIO |
659 | select VIRTIO_RING | ||
660 | select VIRTIO_CONSOLE | 659 | select VIRTIO_CONSOLE |
661 | help | 660 | help |
662 | Enabling this option adds support for virtio based paravirtual device | 661 | Enabling this option adds support for virtio based paravirtual device |
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 6e121a2a49e1..7872a3330fb5 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig | |||
@@ -4,7 +4,6 @@ config LGUEST_GUEST | |||
4 | depends on X86_32 | 4 | depends on X86_32 |
5 | select VIRTUALIZATION | 5 | select VIRTUALIZATION |
6 | select VIRTIO | 6 | select VIRTIO |
7 | select VIRTIO_RING | ||
8 | select VIRTIO_CONSOLE | 7 | select VIRTIO_CONSOLE |
9 | help | 8 | help |
10 | Lguest is a tiny in-kernel hypervisor. Selecting this will | 9 | Lguest is a tiny in-kernel hypervisor. Selecting this will |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c0bbeb470754..0bdde8fba397 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -14,6 +14,9 @@ | |||
14 | 14 | ||
15 | #define PART_BITS 4 | 15 | #define PART_BITS 4 |
16 | 16 | ||
17 | static bool use_bio; | ||
18 | module_param(use_bio, bool, S_IRUGO); | ||
19 | |||
17 | static int major; | 20 | static int major; |
18 | static DEFINE_IDA(vd_index_ida); | 21 | static DEFINE_IDA(vd_index_ida); |
19 | 22 | ||
@@ -23,6 +26,7 @@ struct virtio_blk | |||
23 | { | 26 | { |
24 | struct virtio_device *vdev; | 27 | struct virtio_device *vdev; |
25 | struct virtqueue *vq; | 28 | struct virtqueue *vq; |
29 | wait_queue_head_t queue_wait; | ||
26 | 30 | ||
27 | /* The disk structure for the kernel. */ | 31 | /* The disk structure for the kernel. */ |
28 | struct gendisk *disk; | 32 | struct gendisk *disk; |
@@ -51,53 +55,244 @@ struct virtio_blk | |||
51 | struct virtblk_req | 55 | struct virtblk_req |
52 | { | 56 | { |
53 | struct request *req; | 57 | struct request *req; |
58 | struct bio *bio; | ||
54 | struct virtio_blk_outhdr out_hdr; | 59 | struct virtio_blk_outhdr out_hdr; |
55 | struct virtio_scsi_inhdr in_hdr; | 60 | struct virtio_scsi_inhdr in_hdr; |
61 | struct work_struct work; | ||
62 | struct virtio_blk *vblk; | ||
63 | int flags; | ||
56 | u8 status; | 64 | u8 status; |
65 | struct scatterlist sg[]; | ||
66 | }; | ||
67 | |||
68 | enum { | ||
69 | VBLK_IS_FLUSH = 1, | ||
70 | VBLK_REQ_FLUSH = 2, | ||
71 | VBLK_REQ_DATA = 4, | ||
72 | VBLK_REQ_FUA = 8, | ||
57 | }; | 73 | }; |
58 | 74 | ||
59 | static void blk_done(struct virtqueue *vq) | 75 | static inline int virtblk_result(struct virtblk_req *vbr) |
76 | { | ||
77 | switch (vbr->status) { | ||
78 | case VIRTIO_BLK_S_OK: | ||
79 | return 0; | ||
80 | case VIRTIO_BLK_S_UNSUPP: | ||
81 | return -ENOTTY; | ||
82 | default: | ||
83 | return -EIO; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk, | ||
88 | gfp_t gfp_mask) | ||
60 | { | 89 | { |
61 | struct virtio_blk *vblk = vq->vdev->priv; | ||
62 | struct virtblk_req *vbr; | 90 | struct virtblk_req *vbr; |
63 | unsigned int len; | ||
64 | unsigned long flags; | ||
65 | 91 | ||
66 | spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); | 92 | vbr = mempool_alloc(vblk->pool, gfp_mask); |
67 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 93 | if (!vbr) |
68 | int error; | 94 | return NULL; |
69 | 95 | ||
70 | switch (vbr->status) { | 96 | vbr->vblk = vblk; |
71 | case VIRTIO_BLK_S_OK: | 97 | if (use_bio) |
72 | error = 0; | 98 | sg_init_table(vbr->sg, vblk->sg_elems); |
73 | break; | 99 | |
74 | case VIRTIO_BLK_S_UNSUPP: | 100 | return vbr; |
75 | error = -ENOTTY; | 101 | } |
76 | break; | 102 | |
77 | default: | 103 | static void virtblk_add_buf_wait(struct virtio_blk *vblk, |
78 | error = -EIO; | 104 | struct virtblk_req *vbr, |
105 | unsigned long out, | ||
106 | unsigned long in) | ||
107 | { | ||
108 | DEFINE_WAIT(wait); | ||
109 | |||
110 | for (;;) { | ||
111 | prepare_to_wait_exclusive(&vblk->queue_wait, &wait, | ||
112 | TASK_UNINTERRUPTIBLE); | ||
113 | |||
114 | spin_lock_irq(vblk->disk->queue->queue_lock); | ||
115 | if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, | ||
116 | GFP_ATOMIC) < 0) { | ||
117 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
118 | io_schedule(); | ||
119 | } else { | ||
120 | virtqueue_kick(vblk->vq); | ||
121 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
79 | break; | 122 | break; |
80 | } | 123 | } |
81 | 124 | ||
82 | switch (vbr->req->cmd_type) { | 125 | } |
83 | case REQ_TYPE_BLOCK_PC: | 126 | |
84 | vbr->req->resid_len = vbr->in_hdr.residual; | 127 | finish_wait(&vblk->queue_wait, &wait); |
85 | vbr->req->sense_len = vbr->in_hdr.sense_len; | 128 | } |
86 | vbr->req->errors = vbr->in_hdr.errors; | 129 | |
87 | break; | 130 | static inline void virtblk_add_req(struct virtblk_req *vbr, |
88 | case REQ_TYPE_SPECIAL: | 131 | unsigned int out, unsigned int in) |
89 | vbr->req->errors = (error != 0); | 132 | { |
90 | break; | 133 | struct virtio_blk *vblk = vbr->vblk; |
91 | default: | 134 | |
92 | break; | 135 | spin_lock_irq(vblk->disk->queue->queue_lock); |
136 | if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, | ||
137 | GFP_ATOMIC) < 0)) { | ||
138 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
139 | virtblk_add_buf_wait(vblk, vbr, out, in); | ||
140 | return; | ||
141 | } | ||
142 | virtqueue_kick(vblk->vq); | ||
143 | spin_unlock_irq(vblk->disk->queue->queue_lock); | ||
144 | } | ||
145 | |||
146 | static int virtblk_bio_send_flush(struct virtblk_req *vbr) | ||
147 | { | ||
148 | unsigned int out = 0, in = 0; | ||
149 | |||
150 | vbr->flags |= VBLK_IS_FLUSH; | ||
151 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | ||
152 | vbr->out_hdr.sector = 0; | ||
153 | vbr->out_hdr.ioprio = 0; | ||
154 | sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | ||
155 | sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status)); | ||
156 | |||
157 | virtblk_add_req(vbr, out, in); | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | static int virtblk_bio_send_data(struct virtblk_req *vbr) | ||
163 | { | ||
164 | struct virtio_blk *vblk = vbr->vblk; | ||
165 | unsigned int num, out = 0, in = 0; | ||
166 | struct bio *bio = vbr->bio; | ||
167 | |||
168 | vbr->flags &= ~VBLK_IS_FLUSH; | ||
169 | vbr->out_hdr.type = 0; | ||
170 | vbr->out_hdr.sector = bio->bi_sector; | ||
171 | vbr->out_hdr.ioprio = bio_prio(bio); | ||
172 | |||
173 | sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); | ||
174 | |||
175 | num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out); | ||
176 | |||
177 | sg_set_buf(&vbr->sg[num + out + in++], &vbr->status, | ||
178 | sizeof(vbr->status)); | ||
179 | |||
180 | if (num) { | ||
181 | if (bio->bi_rw & REQ_WRITE) { | ||
182 | vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; | ||
183 | out += num; | ||
184 | } else { | ||
185 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | ||
186 | in += num; | ||
93 | } | 187 | } |
188 | } | ||
189 | |||
190 | virtblk_add_req(vbr, out, in); | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static void virtblk_bio_send_data_work(struct work_struct *work) | ||
196 | { | ||
197 | struct virtblk_req *vbr; | ||
198 | |||
199 | vbr = container_of(work, struct virtblk_req, work); | ||
200 | |||
201 | virtblk_bio_send_data(vbr); | ||
202 | } | ||
203 | |||
204 | static void virtblk_bio_send_flush_work(struct work_struct *work) | ||
205 | { | ||
206 | struct virtblk_req *vbr; | ||
207 | |||
208 | vbr = container_of(work, struct virtblk_req, work); | ||
209 | |||
210 | virtblk_bio_send_flush(vbr); | ||
211 | } | ||
212 | |||
213 | static inline void virtblk_request_done(struct virtblk_req *vbr) | ||
214 | { | ||
215 | struct virtio_blk *vblk = vbr->vblk; | ||
216 | struct request *req = vbr->req; | ||
217 | int error = virtblk_result(vbr); | ||
218 | |||
219 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { | ||
220 | req->resid_len = vbr->in_hdr.residual; | ||
221 | req->sense_len = vbr->in_hdr.sense_len; | ||
222 | req->errors = vbr->in_hdr.errors; | ||
223 | } else if (req->cmd_type == REQ_TYPE_SPECIAL) { | ||
224 | req->errors = (error != 0); | ||
225 | } | ||
226 | |||
227 | __blk_end_request_all(req, error); | ||
228 | mempool_free(vbr, vblk->pool); | ||
229 | } | ||
230 | |||
231 | static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) | ||
232 | { | ||
233 | struct virtio_blk *vblk = vbr->vblk; | ||
234 | |||
235 | if (vbr->flags & VBLK_REQ_DATA) { | ||
236 | /* Send out the actual write data */ | ||
237 | INIT_WORK(&vbr->work, virtblk_bio_send_data_work); | ||
238 | queue_work(virtblk_wq, &vbr->work); | ||
239 | } else { | ||
240 | bio_endio(vbr->bio, virtblk_result(vbr)); | ||
241 | mempool_free(vbr, vblk->pool); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | static inline void virtblk_bio_data_done(struct virtblk_req *vbr) | ||
246 | { | ||
247 | struct virtio_blk *vblk = vbr->vblk; | ||
94 | 248 | ||
95 | __blk_end_request_all(vbr->req, error); | 249 | if (unlikely(vbr->flags & VBLK_REQ_FUA)) { |
250 | /* Send out a flush before end the bio */ | ||
251 | vbr->flags &= ~VBLK_REQ_DATA; | ||
252 | INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); | ||
253 | queue_work(virtblk_wq, &vbr->work); | ||
254 | } else { | ||
255 | bio_endio(vbr->bio, virtblk_result(vbr)); | ||
96 | mempool_free(vbr, vblk->pool); | 256 | mempool_free(vbr, vblk->pool); |
97 | } | 257 | } |
258 | } | ||
259 | |||
260 | static inline void virtblk_bio_done(struct virtblk_req *vbr) | ||
261 | { | ||
262 | if (unlikely(vbr->flags & VBLK_IS_FLUSH)) | ||
263 | virtblk_bio_flush_done(vbr); | ||
264 | else | ||
265 | virtblk_bio_data_done(vbr); | ||
266 | } | ||
267 | |||
268 | static void virtblk_done(struct virtqueue *vq) | ||
269 | { | ||
270 | struct virtio_blk *vblk = vq->vdev->priv; | ||
271 | bool bio_done = false, req_done = false; | ||
272 | struct virtblk_req *vbr; | ||
273 | unsigned long flags; | ||
274 | unsigned int len; | ||
275 | |||
276 | spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); | ||
277 | do { | ||
278 | virtqueue_disable_cb(vq); | ||
279 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | ||
280 | if (vbr->bio) { | ||
281 | virtblk_bio_done(vbr); | ||
282 | bio_done = true; | ||
283 | } else { | ||
284 | virtblk_request_done(vbr); | ||
285 | req_done = true; | ||
286 | } | ||
287 | } | ||
288 | } while (!virtqueue_enable_cb(vq)); | ||
98 | /* In case queue is stopped waiting for more buffers. */ | 289 | /* In case queue is stopped waiting for more buffers. */ |
99 | blk_start_queue(vblk->disk->queue); | 290 | if (req_done) |
291 | blk_start_queue(vblk->disk->queue); | ||
100 | spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); | 292 | spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); |
293 | |||
294 | if (bio_done) | ||
295 | wake_up(&vblk->queue_wait); | ||
101 | } | 296 | } |
102 | 297 | ||
103 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | 298 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, |
@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
106 | unsigned long num, out = 0, in = 0; | 301 | unsigned long num, out = 0, in = 0; |
107 | struct virtblk_req *vbr; | 302 | struct virtblk_req *vbr; |
108 | 303 | ||
109 | vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); | 304 | vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); |
110 | if (!vbr) | 305 | if (!vbr) |
111 | /* When another request finishes we'll try again. */ | 306 | /* When another request finishes we'll try again. */ |
112 | return false; | 307 | return false; |
113 | 308 | ||
114 | vbr->req = req; | 309 | vbr->req = req; |
115 | 310 | vbr->bio = NULL; | |
116 | if (req->cmd_flags & REQ_FLUSH) { | 311 | if (req->cmd_flags & REQ_FLUSH) { |
117 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | 312 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; |
118 | vbr->out_hdr.sector = 0; | 313 | vbr->out_hdr.sector = 0; |
@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
172 | } | 367 | } |
173 | } | 368 | } |
174 | 369 | ||
175 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { | 370 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, |
371 | GFP_ATOMIC) < 0) { | ||
176 | mempool_free(vbr, vblk->pool); | 372 | mempool_free(vbr, vblk->pool); |
177 | return false; | 373 | return false; |
178 | } | 374 | } |
@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
180 | return true; | 376 | return true; |
181 | } | 377 | } |
182 | 378 | ||
183 | static void do_virtblk_request(struct request_queue *q) | 379 | static void virtblk_request(struct request_queue *q) |
184 | { | 380 | { |
185 | struct virtio_blk *vblk = q->queuedata; | 381 | struct virtio_blk *vblk = q->queuedata; |
186 | struct request *req; | 382 | struct request *req; |
@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q) | |||
203 | virtqueue_kick(vblk->vq); | 399 | virtqueue_kick(vblk->vq); |
204 | } | 400 | } |
205 | 401 | ||
402 | static void virtblk_make_request(struct request_queue *q, struct bio *bio) | ||
403 | { | ||
404 | struct virtio_blk *vblk = q->queuedata; | ||
405 | struct virtblk_req *vbr; | ||
406 | |||
407 | BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems); | ||
408 | |||
409 | vbr = virtblk_alloc_req(vblk, GFP_NOIO); | ||
410 | if (!vbr) { | ||
411 | bio_endio(bio, -ENOMEM); | ||
412 | return; | ||
413 | } | ||
414 | |||
415 | vbr->bio = bio; | ||
416 | vbr->flags = 0; | ||
417 | if (bio->bi_rw & REQ_FLUSH) | ||
418 | vbr->flags |= VBLK_REQ_FLUSH; | ||
419 | if (bio->bi_rw & REQ_FUA) | ||
420 | vbr->flags |= VBLK_REQ_FUA; | ||
421 | if (bio->bi_size) | ||
422 | vbr->flags |= VBLK_REQ_DATA; | ||
423 | |||
424 | if (unlikely(vbr->flags & VBLK_REQ_FLUSH)) | ||
425 | virtblk_bio_send_flush(vbr); | ||
426 | else | ||
427 | virtblk_bio_send_data(vbr); | ||
428 | } | ||
429 | |||
206 | /* return id (s/n) string for *disk to *id_str | 430 | /* return id (s/n) string for *disk to *id_str |
207 | */ | 431 | */ |
208 | static int virtblk_get_id(struct gendisk *disk, char *id_str) | 432 | static int virtblk_get_id(struct gendisk *disk, char *id_str) |
@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk) | |||
360 | int err = 0; | 584 | int err = 0; |
361 | 585 | ||
362 | /* We expect one virtqueue, for output. */ | 586 | /* We expect one virtqueue, for output. */ |
363 | vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); | 587 | vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); |
364 | if (IS_ERR(vblk->vq)) | 588 | if (IS_ERR(vblk->vq)) |
365 | err = PTR_ERR(vblk->vq); | 589 | err = PTR_ERR(vblk->vq); |
366 | 590 | ||
@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
477 | struct virtio_blk *vblk; | 701 | struct virtio_blk *vblk; |
478 | struct request_queue *q; | 702 | struct request_queue *q; |
479 | int err, index; | 703 | int err, index; |
704 | int pool_size; | ||
705 | |||
480 | u64 cap; | 706 | u64 cap; |
481 | u32 v, blk_size, sg_elems, opt_io_size; | 707 | u32 v, blk_size, sg_elems, opt_io_size; |
482 | u16 min_io_size; | 708 | u16 min_io_size; |
@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
506 | goto out_free_index; | 732 | goto out_free_index; |
507 | } | 733 | } |
508 | 734 | ||
735 | init_waitqueue_head(&vblk->queue_wait); | ||
509 | vblk->vdev = vdev; | 736 | vblk->vdev = vdev; |
510 | vblk->sg_elems = sg_elems; | 737 | vblk->sg_elems = sg_elems; |
511 | sg_init_table(vblk->sg, vblk->sg_elems); | 738 | sg_init_table(vblk->sg, vblk->sg_elems); |
512 | mutex_init(&vblk->config_lock); | 739 | mutex_init(&vblk->config_lock); |
740 | |||
513 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); | 741 | INIT_WORK(&vblk->config_work, virtblk_config_changed_work); |
514 | vblk->config_enable = true; | 742 | vblk->config_enable = true; |
515 | 743 | ||
@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
517 | if (err) | 745 | if (err) |
518 | goto out_free_vblk; | 746 | goto out_free_vblk; |
519 | 747 | ||
520 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); | 748 | pool_size = sizeof(struct virtblk_req); |
749 | if (use_bio) | ||
750 | pool_size += sizeof(struct scatterlist) * sg_elems; | ||
751 | vblk->pool = mempool_create_kmalloc_pool(1, pool_size); | ||
521 | if (!vblk->pool) { | 752 | if (!vblk->pool) { |
522 | err = -ENOMEM; | 753 | err = -ENOMEM; |
523 | goto out_free_vq; | 754 | goto out_free_vq; |
@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
530 | goto out_mempool; | 761 | goto out_mempool; |
531 | } | 762 | } |
532 | 763 | ||
533 | q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); | 764 | q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); |
534 | if (!q) { | 765 | if (!q) { |
535 | err = -ENOMEM; | 766 | err = -ENOMEM; |
536 | goto out_put_disk; | 767 | goto out_put_disk; |
537 | } | 768 | } |
538 | 769 | ||
770 | if (use_bio) | ||
771 | blk_queue_make_request(q, virtblk_make_request); | ||
539 | q->queuedata = vblk; | 772 | q->queuedata = vblk; |
540 | 773 | ||
541 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); | 774 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); |
@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) | |||
620 | if (!err && opt_io_size) | 853 | if (!err && opt_io_size) |
621 | blk_queue_io_opt(q, blk_size * opt_io_size); | 854 | blk_queue_io_opt(q, blk_size * opt_io_size); |
622 | 855 | ||
623 | |||
624 | add_disk(vblk->disk); | 856 | add_disk(vblk->disk); |
625 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); | 857 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); |
626 | if (err) | 858 | if (err) |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 060a672ebb7b..8ab9c3d4bf13 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/err.h> | 24 | #include <linux/err.h> |
25 | #include <linux/freezer.h> | 25 | #include <linux/freezer.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/splice.h> | ||
28 | #include <linux/pagemap.h> | ||
27 | #include <linux/init.h> | 29 | #include <linux/init.h> |
28 | #include <linux/list.h> | 30 | #include <linux/list.h> |
29 | #include <linux/poll.h> | 31 | #include <linux/poll.h> |
@@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event, | |||
474 | return 0; | 476 | return 0; |
475 | } | 477 | } |
476 | 478 | ||
479 | struct buffer_token { | ||
480 | union { | ||
481 | void *buf; | ||
482 | struct scatterlist *sg; | ||
483 | } u; | ||
484 | /* If sgpages == 0 then buf is used, else sg is used */ | ||
485 | unsigned int sgpages; | ||
486 | }; | ||
487 | |||
488 | static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages) | ||
489 | { | ||
490 | int i; | ||
491 | struct page *page; | ||
492 | |||
493 | for (i = 0; i < nrpages; i++) { | ||
494 | page = sg_page(&sg[i]); | ||
495 | if (!page) | ||
496 | break; | ||
497 | put_page(page); | ||
498 | } | ||
499 | kfree(sg); | ||
500 | } | ||
501 | |||
477 | /* Callers must take the port->outvq_lock */ | 502 | /* Callers must take the port->outvq_lock */ |
478 | static void reclaim_consumed_buffers(struct port *port) | 503 | static void reclaim_consumed_buffers(struct port *port) |
479 | { | 504 | { |
480 | void *buf; | 505 | struct buffer_token *tok; |
481 | unsigned int len; | 506 | unsigned int len; |
482 | 507 | ||
483 | if (!port->portdev) { | 508 | if (!port->portdev) { |
484 | /* Device has been unplugged. vqs are already gone. */ | 509 | /* Device has been unplugged. vqs are already gone. */ |
485 | return; | 510 | return; |
486 | } | 511 | } |
487 | while ((buf = virtqueue_get_buf(port->out_vq, &len))) { | 512 | while ((tok = virtqueue_get_buf(port->out_vq, &len))) { |
488 | kfree(buf); | 513 | if (tok->sgpages) |
514 | reclaim_sg_pages(tok->u.sg, tok->sgpages); | ||
515 | else | ||
516 | kfree(tok->u.buf); | ||
517 | kfree(tok); | ||
489 | port->outvq_full = false; | 518 | port->outvq_full = false; |
490 | } | 519 | } |
491 | } | 520 | } |
492 | 521 | ||
493 | static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, | 522 | static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, |
494 | bool nonblock) | 523 | int nents, size_t in_count, |
524 | struct buffer_token *tok, bool nonblock) | ||
495 | { | 525 | { |
496 | struct scatterlist sg[1]; | ||
497 | struct virtqueue *out_vq; | 526 | struct virtqueue *out_vq; |
498 | ssize_t ret; | 527 | ssize_t ret; |
499 | unsigned long flags; | 528 | unsigned long flags; |
@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, | |||
505 | 534 | ||
506 | reclaim_consumed_buffers(port); | 535 | reclaim_consumed_buffers(port); |
507 | 536 | ||
508 | sg_init_one(sg, in_buf, in_count); | 537 | ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC); |
509 | ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC); | ||
510 | 538 | ||
511 | /* Tell Host to go! */ | 539 | /* Tell Host to go! */ |
512 | virtqueue_kick(out_vq); | 540 | virtqueue_kick(out_vq); |
@@ -544,6 +572,37 @@ done: | |||
544 | return in_count; | 572 | return in_count; |
545 | } | 573 | } |
546 | 574 | ||
575 | static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, | ||
576 | bool nonblock) | ||
577 | { | ||
578 | struct scatterlist sg[1]; | ||
579 | struct buffer_token *tok; | ||
580 | |||
581 | tok = kmalloc(sizeof(*tok), GFP_ATOMIC); | ||
582 | if (!tok) | ||
583 | return -ENOMEM; | ||
584 | tok->sgpages = 0; | ||
585 | tok->u.buf = in_buf; | ||
586 | |||
587 | sg_init_one(sg, in_buf, in_count); | ||
588 | |||
589 | return __send_to_port(port, sg, 1, in_count, tok, nonblock); | ||
590 | } | ||
591 | |||
592 | static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents, | ||
593 | size_t in_count, bool nonblock) | ||
594 | { | ||
595 | struct buffer_token *tok; | ||
596 | |||
597 | tok = kmalloc(sizeof(*tok), GFP_ATOMIC); | ||
598 | if (!tok) | ||
599 | return -ENOMEM; | ||
600 | tok->sgpages = nents; | ||
601 | tok->u.sg = sg; | ||
602 | |||
603 | return __send_to_port(port, sg, nents, in_count, tok, nonblock); | ||
604 | } | ||
605 | |||
547 | /* | 606 | /* |
548 | * Give out the data that's requested from the buffer that we have | 607 | * Give out the data that's requested from the buffer that we have |
549 | * queued up. | 608 | * queued up. |
@@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, | |||
665 | return fill_readbuf(port, ubuf, count, true); | 724 | return fill_readbuf(port, ubuf, count, true); |
666 | } | 725 | } |
667 | 726 | ||
727 | static int wait_port_writable(struct port *port, bool nonblock) | ||
728 | { | ||
729 | int ret; | ||
730 | |||
731 | if (will_write_block(port)) { | ||
732 | if (nonblock) | ||
733 | return -EAGAIN; | ||
734 | |||
735 | ret = wait_event_freezable(port->waitqueue, | ||
736 | !will_write_block(port)); | ||
737 | if (ret < 0) | ||
738 | return ret; | ||
739 | } | ||
740 | /* Port got hot-unplugged. */ | ||
741 | if (!port->guest_connected) | ||
742 | return -ENODEV; | ||
743 | |||
744 | return 0; | ||
745 | } | ||
746 | |||
668 | static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | 747 | static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, |
669 | size_t count, loff_t *offp) | 748 | size_t count, loff_t *offp) |
670 | { | 749 | { |
@@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | |||
681 | 760 | ||
682 | nonblock = filp->f_flags & O_NONBLOCK; | 761 | nonblock = filp->f_flags & O_NONBLOCK; |
683 | 762 | ||
684 | if (will_write_block(port)) { | 763 | ret = wait_port_writable(port, nonblock); |
685 | if (nonblock) | 764 | if (ret < 0) |
686 | return -EAGAIN; | 765 | return ret; |
687 | |||
688 | ret = wait_event_freezable(port->waitqueue, | ||
689 | !will_write_block(port)); | ||
690 | if (ret < 0) | ||
691 | return ret; | ||
692 | } | ||
693 | /* Port got hot-unplugged. */ | ||
694 | if (!port->guest_connected) | ||
695 | return -ENODEV; | ||
696 | 766 | ||
697 | count = min((size_t)(32 * 1024), count); | 767 | count = min((size_t)(32 * 1024), count); |
698 | 768 | ||
@@ -725,6 +795,93 @@ out: | |||
725 | return ret; | 795 | return ret; |
726 | } | 796 | } |
727 | 797 | ||
798 | struct sg_list { | ||
799 | unsigned int n; | ||
800 | unsigned int size; | ||
801 | size_t len; | ||
802 | struct scatterlist *sg; | ||
803 | }; | ||
804 | |||
805 | static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | ||
806 | struct splice_desc *sd) | ||
807 | { | ||
808 | struct sg_list *sgl = sd->u.data; | ||
809 | unsigned int offset, len; | ||
810 | |||
811 | if (sgl->n == sgl->size) | ||
812 | return 0; | ||
813 | |||
814 | /* Try lock this page */ | ||
815 | if (buf->ops->steal(pipe, buf) == 0) { | ||
816 | /* Get reference and unlock page for moving */ | ||
817 | get_page(buf->page); | ||
818 | unlock_page(buf->page); | ||
819 | |||
820 | len = min(buf->len, sd->len); | ||
821 | sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset); | ||
822 | } else { | ||
823 | /* Failback to copying a page */ | ||
824 | struct page *page = alloc_page(GFP_KERNEL); | ||
825 | char *src = buf->ops->map(pipe, buf, 1); | ||
826 | char *dst; | ||
827 | |||
828 | if (!page) | ||
829 | return -ENOMEM; | ||
830 | dst = kmap(page); | ||
831 | |||
832 | offset = sd->pos & ~PAGE_MASK; | ||
833 | |||
834 | len = sd->len; | ||
835 | if (len + offset > PAGE_SIZE) | ||
836 | len = PAGE_SIZE - offset; | ||
837 | |||
838 | memcpy(dst + offset, src + buf->offset, len); | ||
839 | |||
840 | kunmap(page); | ||
841 | buf->ops->unmap(pipe, buf, src); | ||
842 | |||
843 | sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); | ||
844 | } | ||
845 | sgl->n++; | ||
846 | sgl->len += len; | ||
847 | |||
848 | return len; | ||
849 | } | ||
850 | |||
851 | /* Faster zero-copy write by splicing */ | ||
852 | static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, | ||
853 | struct file *filp, loff_t *ppos, | ||
854 | size_t len, unsigned int flags) | ||
855 | { | ||
856 | struct port *port = filp->private_data; | ||
857 | struct sg_list sgl; | ||
858 | ssize_t ret; | ||
859 | struct splice_desc sd = { | ||
860 | .total_len = len, | ||
861 | .flags = flags, | ||
862 | .pos = *ppos, | ||
863 | .u.data = &sgl, | ||
864 | }; | ||
865 | |||
866 | ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); | ||
867 | if (ret < 0) | ||
868 | return ret; | ||
869 | |||
870 | sgl.n = 0; | ||
871 | sgl.len = 0; | ||
872 | sgl.size = pipe->nrbufs; | ||
873 | sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL); | ||
874 | if (unlikely(!sgl.sg)) | ||
875 | return -ENOMEM; | ||
876 | |||
877 | sg_init_table(sgl.sg, sgl.size); | ||
878 | ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); | ||
879 | if (likely(ret > 0)) | ||
880 | ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true); | ||
881 | |||
882 | return ret; | ||
883 | } | ||
884 | |||
728 | static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | 885 | static unsigned int port_fops_poll(struct file *filp, poll_table *wait) |
729 | { | 886 | { |
730 | struct port *port; | 887 | struct port *port; |
@@ -856,6 +1013,7 @@ static const struct file_operations port_fops = { | |||
856 | .open = port_fops_open, | 1013 | .open = port_fops_open, |
857 | .read = port_fops_read, | 1014 | .read = port_fops_read, |
858 | .write = port_fops_write, | 1015 | .write = port_fops_write, |
1016 | .splice_write = port_fops_splice_write, | ||
859 | .poll = port_fops_poll, | 1017 | .poll = port_fops_poll, |
860 | .release = port_fops_release, | 1018 | .release = port_fops_release, |
861 | .fasync = port_fops_fasync, | 1019 | .fasync = port_fops_fasync, |
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 9e8388efd88e..fc92ccbd71dc 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c | |||
@@ -263,6 +263,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
263 | struct virtqueue *vq; | 263 | struct virtqueue *vq; |
264 | int err; | 264 | int err; |
265 | 265 | ||
266 | if (!name) | ||
267 | return NULL; | ||
268 | |||
266 | /* We must have this many virtqueues. */ | 269 | /* We must have this many virtqueues. */ |
267 | if (index >= ldev->desc->num_vq) | 270 | if (index >= ldev->desc->num_vq) |
268 | return ERR_PTR(-ENOENT); | 271 | return ERR_PTR(-ENOENT); |
@@ -296,7 +299,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, | |||
296 | * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu | 299 | * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu |
297 | * barriers. | 300 | * barriers. |
298 | */ | 301 | */ |
299 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev, | 302 | vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev, |
300 | true, lvq->pages, lg_notify, callback, name); | 303 | true, lvq->pages, lg_notify, callback, name); |
301 | if (!vq) { | 304 | if (!vq) { |
302 | err = -ENOMEM; | 305 | err = -ENOMEM; |
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 3541b4492f64..e7a4780e93db 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c | |||
@@ -84,6 +84,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, | |||
84 | if (id >= ARRAY_SIZE(rvdev->vring)) | 84 | if (id >= ARRAY_SIZE(rvdev->vring)) |
85 | return ERR_PTR(-EINVAL); | 85 | return ERR_PTR(-EINVAL); |
86 | 86 | ||
87 | if (!name) | ||
88 | return NULL; | ||
89 | |||
87 | ret = rproc_alloc_vring(rvdev, id); | 90 | ret = rproc_alloc_vring(rvdev, id); |
88 | if (ret) | 91 | if (ret) |
89 | return ERR_PTR(ret); | 92 | return ERR_PTR(ret); |
@@ -103,7 +106,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, | |||
103 | * Create the new vq, and tell virtio we're not interested in | 106 | * Create the new vq, and tell virtio we're not interested in |
104 | * the 'weak' smp barriers, since we're talking with a real device. | 107 | * the 'weak' smp barriers, since we're talking with a real device. |
105 | */ | 108 | */ |
106 | vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr, | 109 | vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr, |
107 | rproc_virtio_notify, callback, name); | 110 | rproc_virtio_notify, callback, name); |
108 | if (!vq) { | 111 | if (!vq) { |
109 | dev_err(dev, "vring_new_virtqueue %s failed\n", name); | 112 | dev_err(dev, "vring_new_virtqueue %s failed\n", name); |
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig index 32aead65735a..2bd911f12571 100644 --- a/drivers/rpmsg/Kconfig +++ b/drivers/rpmsg/Kconfig | |||
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)" | |||
4 | config RPMSG | 4 | config RPMSG |
5 | tristate | 5 | tristate |
6 | select VIRTIO | 6 | select VIRTIO |
7 | select VIRTIO_RING | ||
8 | depends on EXPERIMENTAL | 7 | depends on EXPERIMENTAL |
9 | 8 | ||
10 | endmenu | 9 | endmenu |
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 47cccd52aae8..7dabef624da3 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c | |||
@@ -190,6 +190,9 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, | |||
190 | if (index >= kdev->desc->num_vq) | 190 | if (index >= kdev->desc->num_vq) |
191 | return ERR_PTR(-ENOENT); | 191 | return ERR_PTR(-ENOENT); |
192 | 192 | ||
193 | if (!name) | ||
194 | return NULL; | ||
195 | |||
193 | config = kvm_vq_config(kdev->desc)+index; | 196 | config = kvm_vq_config(kdev->desc)+index; |
194 | 197 | ||
195 | err = vmem_add_mapping(config->address, | 198 | err = vmem_add_mapping(config->address, |
@@ -198,7 +201,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, | |||
198 | if (err) | 201 | if (err) |
199 | goto out; | 202 | goto out; |
200 | 203 | ||
201 | vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, | 204 | vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN, |
202 | vdev, true, (void *) config->address, | 205 | vdev, true, (void *) config->address, |
203 | kvm_notify, callback, name); | 206 | kvm_notify, callback, name); |
204 | if (!vq) { | 207 | if (!vq) { |
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index f38b17a86c35..8d5bddb56cb1 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig | |||
@@ -1,11 +1,9 @@ | |||
1 | # Virtio always gets selected by whoever wants it. | ||
2 | config VIRTIO | 1 | config VIRTIO |
3 | tristate | 2 | tristate |
4 | 3 | ---help--- | |
5 | # Similarly the virtio ring implementation. | 4 | This option is selected by any driver which implements the virtio |
6 | config VIRTIO_RING | 5 | bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, |
7 | tristate | 6 | CONFIG_RPMSG or CONFIG_S390_GUEST. |
8 | depends on VIRTIO | ||
9 | 7 | ||
10 | menu "Virtio drivers" | 8 | menu "Virtio drivers" |
11 | 9 | ||
@@ -13,7 +11,6 @@ config VIRTIO_PCI | |||
13 | tristate "PCI driver for virtio devices (EXPERIMENTAL)" | 11 | tristate "PCI driver for virtio devices (EXPERIMENTAL)" |
14 | depends on PCI && EXPERIMENTAL | 12 | depends on PCI && EXPERIMENTAL |
15 | select VIRTIO | 13 | select VIRTIO |
16 | select VIRTIO_RING | ||
17 | ---help--- | 14 | ---help--- |
18 | This drivers provides support for virtio based paravirtual device | 15 | This drivers provides support for virtio based paravirtual device |
19 | drivers over PCI. This requires that your VMM has appropriate PCI | 16 | drivers over PCI. This requires that your VMM has appropriate PCI |
@@ -26,9 +23,8 @@ config VIRTIO_PCI | |||
26 | If unsure, say M. | 23 | If unsure, say M. |
27 | 24 | ||
28 | config VIRTIO_BALLOON | 25 | config VIRTIO_BALLOON |
29 | tristate "Virtio balloon driver (EXPERIMENTAL)" | 26 | tristate "Virtio balloon driver" |
30 | select VIRTIO | 27 | depends on VIRTIO |
31 | select VIRTIO_RING | ||
32 | ---help--- | 28 | ---help--- |
33 | This driver supports increasing and decreasing the amount | 29 | This driver supports increasing and decreasing the amount |
34 | of memory within a KVM guest. | 30 | of memory within a KVM guest. |
@@ -39,7 +35,6 @@ config VIRTIO_BALLOON | |||
39 | tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)" | 35 | tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)" |
40 | depends on HAS_IOMEM && EXPERIMENTAL | 36 | depends on HAS_IOMEM && EXPERIMENTAL |
41 | select VIRTIO | 37 | select VIRTIO |
42 | select VIRTIO_RING | ||
43 | ---help--- | 38 | ---help--- |
44 | This drivers provides support for memory mapped virtio | 39 | This drivers provides support for memory mapped virtio |
45 | platform device driver. | 40 | platform device driver. |
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 5a4c63cfd380..9076635697bb 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile | |||
@@ -1,5 +1,4 @@ | |||
1 | obj-$(CONFIG_VIRTIO) += virtio.o | 1 | obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o |
2 | obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o | ||
3 | obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o | 2 | obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o |
4 | obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o | 3 | obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o |
5 | obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o | 4 | obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o |
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index c3b3f7f0d9d1..1e8659ca27ef 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c | |||
@@ -159,7 +159,7 @@ static int virtio_dev_remove(struct device *_d) | |||
159 | drv->remove(dev); | 159 | drv->remove(dev); |
160 | 160 | ||
161 | /* Driver should have reset device. */ | 161 | /* Driver should have reset device. */ |
162 | BUG_ON(dev->config->get_status(dev)); | 162 | WARN_ON_ONCE(dev->config->get_status(dev)); |
163 | 163 | ||
164 | /* Acknowledge the device's existence again. */ | 164 | /* Acknowledge the device's existence again. */ |
165 | add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); | 165 | add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); |
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 453db0c403d8..6b1b7e184939 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c | |||
@@ -131,9 +131,6 @@ struct virtio_mmio_vq_info { | |||
131 | /* the number of entries in the queue */ | 131 | /* the number of entries in the queue */ |
132 | unsigned int num; | 132 | unsigned int num; |
133 | 133 | ||
134 | /* the index of the queue */ | ||
135 | int queue_index; | ||
136 | |||
137 | /* the virtual address of the ring queue */ | 134 | /* the virtual address of the ring queue */ |
138 | void *queue; | 135 | void *queue; |
139 | 136 | ||
@@ -225,11 +222,10 @@ static void vm_reset(struct virtio_device *vdev) | |||
225 | static void vm_notify(struct virtqueue *vq) | 222 | static void vm_notify(struct virtqueue *vq) |
226 | { | 223 | { |
227 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); | 224 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); |
228 | struct virtio_mmio_vq_info *info = vq->priv; | ||
229 | 225 | ||
230 | /* We write the queue's selector into the notification register to | 226 | /* We write the queue's selector into the notification register to |
231 | * signal the other end */ | 227 | * signal the other end */ |
232 | writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); | 228 | writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); |
233 | } | 229 | } |
234 | 230 | ||
235 | /* Notify all virtqueues on an interrupt. */ | 231 | /* Notify all virtqueues on an interrupt. */ |
@@ -270,6 +266,7 @@ static void vm_del_vq(struct virtqueue *vq) | |||
270 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); | 266 | struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); |
271 | struct virtio_mmio_vq_info *info = vq->priv; | 267 | struct virtio_mmio_vq_info *info = vq->priv; |
272 | unsigned long flags, size; | 268 | unsigned long flags, size; |
269 | unsigned int index = virtqueue_get_queue_index(vq); | ||
273 | 270 | ||
274 | spin_lock_irqsave(&vm_dev->lock, flags); | 271 | spin_lock_irqsave(&vm_dev->lock, flags); |
275 | list_del(&info->node); | 272 | list_del(&info->node); |
@@ -278,7 +275,7 @@ static void vm_del_vq(struct virtqueue *vq) | |||
278 | vring_del_virtqueue(vq); | 275 | vring_del_virtqueue(vq); |
279 | 276 | ||
280 | /* Select and deactivate the queue */ | 277 | /* Select and deactivate the queue */ |
281 | writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); | 278 | writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); |
282 | writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); | 279 | writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); |
283 | 280 | ||
284 | size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); | 281 | size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); |
@@ -309,6 +306,9 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, | |||
309 | unsigned long flags, size; | 306 | unsigned long flags, size; |
310 | int err; | 307 | int err; |
311 | 308 | ||
309 | if (!name) | ||
310 | return NULL; | ||
311 | |||
312 | /* Select the queue we're interested in */ | 312 | /* Select the queue we're interested in */ |
313 | writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); | 313 | writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); |
314 | 314 | ||
@@ -324,7 +324,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, | |||
324 | err = -ENOMEM; | 324 | err = -ENOMEM; |
325 | goto error_kmalloc; | 325 | goto error_kmalloc; |
326 | } | 326 | } |
327 | info->queue_index = index; | ||
328 | 327 | ||
329 | /* Allocate pages for the queue - start with a queue as big as | 328 | /* Allocate pages for the queue - start with a queue as big as |
330 | * possible (limited by maximum size allowed by device), drop down | 329 | * possible (limited by maximum size allowed by device), drop down |
@@ -332,11 +331,21 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, | |||
332 | * and two rings (which makes it "alignment_size * 2") | 331 | * and two rings (which makes it "alignment_size * 2") |
333 | */ | 332 | */ |
334 | info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); | 333 | info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); |
334 | |||
335 | /* If the device reports a 0 entry queue, we won't be able to | ||
336 | * use it to perform I/O, and vring_new_virtqueue() can't create | ||
337 | * empty queues anyway, so don't bother to set up the device. | ||
338 | */ | ||
339 | if (info->num == 0) { | ||
340 | err = -ENOENT; | ||
341 | goto error_alloc_pages; | ||
342 | } | ||
343 | |||
335 | while (1) { | 344 | while (1) { |
336 | size = PAGE_ALIGN(vring_size(info->num, | 345 | size = PAGE_ALIGN(vring_size(info->num, |
337 | VIRTIO_MMIO_VRING_ALIGN)); | 346 | VIRTIO_MMIO_VRING_ALIGN)); |
338 | /* Already smallest possible allocation? */ | 347 | /* Did the last iter shrink the queue below minimum size? */ |
339 | if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) { | 348 | if (size < VIRTIO_MMIO_VRING_ALIGN * 2) { |
340 | err = -ENOMEM; | 349 | err = -ENOMEM; |
341 | goto error_alloc_pages; | 350 | goto error_alloc_pages; |
342 | } | 351 | } |
@@ -356,7 +365,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, | |||
356 | vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); | 365 | vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); |
357 | 366 | ||
358 | /* Create the vring */ | 367 | /* Create the vring */ |
359 | vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, | 368 | vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, |
360 | true, info->queue, vm_notify, callback, name); | 369 | true, info->queue, vm_notify, callback, name); |
361 | if (!vq) { | 370 | if (!vq) { |
362 | err = -ENOMEM; | 371 | err = -ENOMEM; |
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 2e03d416b9af..c33aea36598a 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c | |||
@@ -48,6 +48,7 @@ struct virtio_pci_device | |||
48 | int msix_enabled; | 48 | int msix_enabled; |
49 | int intx_enabled; | 49 | int intx_enabled; |
50 | struct msix_entry *msix_entries; | 50 | struct msix_entry *msix_entries; |
51 | cpumask_var_t *msix_affinity_masks; | ||
51 | /* Name strings for interrupts. This size should be enough, | 52 | /* Name strings for interrupts. This size should be enough, |
52 | * and I'm too lazy to allocate each name separately. */ | 53 | * and I'm too lazy to allocate each name separately. */ |
53 | char (*msix_names)[256]; | 54 | char (*msix_names)[256]; |
@@ -79,9 +80,6 @@ struct virtio_pci_vq_info | |||
79 | /* the number of entries in the queue */ | 80 | /* the number of entries in the queue */ |
80 | int num; | 81 | int num; |
81 | 82 | ||
82 | /* the index of the queue */ | ||
83 | int queue_index; | ||
84 | |||
85 | /* the virtual address of the ring queue */ | 83 | /* the virtual address of the ring queue */ |
86 | void *queue; | 84 | void *queue; |
87 | 85 | ||
@@ -202,11 +200,11 @@ static void vp_reset(struct virtio_device *vdev) | |||
202 | static void vp_notify(struct virtqueue *vq) | 200 | static void vp_notify(struct virtqueue *vq) |
203 | { | 201 | { |
204 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); | 202 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); |
205 | struct virtio_pci_vq_info *info = vq->priv; | ||
206 | 203 | ||
207 | /* we write the queue's selector into the notification register to | 204 | /* we write the queue's selector into the notification register to |
208 | * signal the other end */ | 205 | * signal the other end */ |
209 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); | 206 | iowrite16(virtqueue_get_queue_index(vq), |
207 | vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); | ||
210 | } | 208 | } |
211 | 209 | ||
212 | /* Handle a configuration change: Tell driver if it wants to know. */ | 210 | /* Handle a configuration change: Tell driver if it wants to know. */ |
@@ -279,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
279 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) | 277 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) |
280 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); | 278 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); |
281 | 279 | ||
280 | for (i = 0; i < vp_dev->msix_vectors; i++) | ||
281 | if (vp_dev->msix_affinity_masks[i]) | ||
282 | free_cpumask_var(vp_dev->msix_affinity_masks[i]); | ||
283 | |||
282 | if (vp_dev->msix_enabled) { | 284 | if (vp_dev->msix_enabled) { |
283 | /* Disable the vector used for configuration */ | 285 | /* Disable the vector used for configuration */ |
284 | iowrite16(VIRTIO_MSI_NO_VECTOR, | 286 | iowrite16(VIRTIO_MSI_NO_VECTOR, |
@@ -296,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev) | |||
296 | vp_dev->msix_names = NULL; | 298 | vp_dev->msix_names = NULL; |
297 | kfree(vp_dev->msix_entries); | 299 | kfree(vp_dev->msix_entries); |
298 | vp_dev->msix_entries = NULL; | 300 | vp_dev->msix_entries = NULL; |
301 | kfree(vp_dev->msix_affinity_masks); | ||
302 | vp_dev->msix_affinity_masks = NULL; | ||
299 | } | 303 | } |
300 | 304 | ||
301 | static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, | 305 | static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, |
@@ -314,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, | |||
314 | GFP_KERNEL); | 318 | GFP_KERNEL); |
315 | if (!vp_dev->msix_names) | 319 | if (!vp_dev->msix_names) |
316 | goto error; | 320 | goto error; |
321 | vp_dev->msix_affinity_masks | ||
322 | = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, | ||
323 | GFP_KERNEL); | ||
324 | if (!vp_dev->msix_affinity_masks) | ||
325 | goto error; | ||
326 | for (i = 0; i < nvectors; ++i) | ||
327 | if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], | ||
328 | GFP_KERNEL)) | ||
329 | goto error; | ||
317 | 330 | ||
318 | for (i = 0; i < nvectors; ++i) | 331 | for (i = 0; i < nvectors; ++i) |
319 | vp_dev->msix_entries[i].entry = i; | 332 | vp_dev->msix_entries[i].entry = i; |
@@ -402,7 +415,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, | |||
402 | if (!info) | 415 | if (!info) |
403 | return ERR_PTR(-ENOMEM); | 416 | return ERR_PTR(-ENOMEM); |
404 | 417 | ||
405 | info->queue_index = index; | ||
406 | info->num = num; | 418 | info->num = num; |
407 | info->msix_vector = msix_vec; | 419 | info->msix_vector = msix_vec; |
408 | 420 | ||
@@ -418,7 +430,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, | |||
418 | vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); | 430 | vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
419 | 431 | ||
420 | /* create the vring */ | 432 | /* create the vring */ |
421 | vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev, | 433 | vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev, |
422 | true, info->queue, vp_notify, callback, name); | 434 | true, info->queue, vp_notify, callback, name); |
423 | if (!vq) { | 435 | if (!vq) { |
424 | err = -ENOMEM; | 436 | err = -ENOMEM; |
@@ -467,7 +479,8 @@ static void vp_del_vq(struct virtqueue *vq) | |||
467 | list_del(&info->node); | 479 | list_del(&info->node); |
468 | spin_unlock_irqrestore(&vp_dev->lock, flags); | 480 | spin_unlock_irqrestore(&vp_dev->lock, flags); |
469 | 481 | ||
470 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); | 482 | iowrite16(virtqueue_get_queue_index(vq), |
483 | vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); | ||
471 | 484 | ||
472 | if (vp_dev->msix_enabled) { | 485 | if (vp_dev->msix_enabled) { |
473 | iowrite16(VIRTIO_MSI_NO_VECTOR, | 486 | iowrite16(VIRTIO_MSI_NO_VECTOR, |
@@ -542,7 +555,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, | |||
542 | vp_dev->per_vq_vectors = per_vq_vectors; | 555 | vp_dev->per_vq_vectors = per_vq_vectors; |
543 | allocated_vectors = vp_dev->msix_used_vectors; | 556 | allocated_vectors = vp_dev->msix_used_vectors; |
544 | for (i = 0; i < nvqs; ++i) { | 557 | for (i = 0; i < nvqs; ++i) { |
545 | if (!callbacks[i] || !vp_dev->msix_enabled) | 558 | if (!names[i]) { |
559 | vqs[i] = NULL; | ||
560 | continue; | ||
561 | } else if (!callbacks[i] || !vp_dev->msix_enabled) | ||
546 | msix_vec = VIRTIO_MSI_NO_VECTOR; | 562 | msix_vec = VIRTIO_MSI_NO_VECTOR; |
547 | else if (vp_dev->per_vq_vectors) | 563 | else if (vp_dev->per_vq_vectors) |
548 | msix_vec = allocated_vectors++; | 564 | msix_vec = allocated_vectors++; |
@@ -609,6 +625,35 @@ static const char *vp_bus_name(struct virtio_device *vdev) | |||
609 | return pci_name(vp_dev->pci_dev); | 625 | return pci_name(vp_dev->pci_dev); |
610 | } | 626 | } |
611 | 627 | ||
628 | /* Setup the affinity for a virtqueue: | ||
629 | * - force the affinity for per vq vector | ||
630 | * - OR over all affinities for shared MSI | ||
631 | * - ignore the affinity request if we're using INTX | ||
632 | */ | ||
633 | static int vp_set_vq_affinity(struct virtqueue *vq, int cpu) | ||
634 | { | ||
635 | struct virtio_device *vdev = vq->vdev; | ||
636 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | ||
637 | struct virtio_pci_vq_info *info = vq->priv; | ||
638 | struct cpumask *mask; | ||
639 | unsigned int irq; | ||
640 | |||
641 | if (!vq->callback) | ||
642 | return -EINVAL; | ||
643 | |||
644 | if (vp_dev->msix_enabled) { | ||
645 | mask = vp_dev->msix_affinity_masks[info->msix_vector]; | ||
646 | irq = vp_dev->msix_entries[info->msix_vector].vector; | ||
647 | if (cpu == -1) | ||
648 | irq_set_affinity_hint(irq, NULL); | ||
649 | else { | ||
650 | cpumask_set_cpu(cpu, mask); | ||
651 | irq_set_affinity_hint(irq, mask); | ||
652 | } | ||
653 | } | ||
654 | return 0; | ||
655 | } | ||
656 | |||
612 | static struct virtio_config_ops virtio_pci_config_ops = { | 657 | static struct virtio_config_ops virtio_pci_config_ops = { |
613 | .get = vp_get, | 658 | .get = vp_get, |
614 | .set = vp_set, | 659 | .set = vp_set, |
@@ -620,6 +665,7 @@ static struct virtio_config_ops virtio_pci_config_ops = { | |||
620 | .get_features = vp_get_features, | 665 | .get_features = vp_get_features, |
621 | .finalize_features = vp_finalize_features, | 666 | .finalize_features = vp_finalize_features, |
622 | .bus_name = vp_bus_name, | 667 | .bus_name = vp_bus_name, |
668 | .set_vq_affinity = vp_set_vq_affinity, | ||
623 | }; | 669 | }; |
624 | 670 | ||
625 | static void virtio_pci_release_dev(struct device *_d) | 671 | static void virtio_pci_release_dev(struct device *_d) |
@@ -673,8 +719,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, | |||
673 | goto out_enable_device; | 719 | goto out_enable_device; |
674 | 720 | ||
675 | vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); | 721 | vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); |
676 | if (vp_dev->ioaddr == NULL) | 722 | if (vp_dev->ioaddr == NULL) { |
723 | err = -ENOMEM; | ||
677 | goto out_req_regions; | 724 | goto out_req_regions; |
725 | } | ||
678 | 726 | ||
679 | pci_set_drvdata(pci_dev, vp_dev); | 727 | pci_set_drvdata(pci_dev, vp_dev); |
680 | pci_set_master(pci_dev); | 728 | pci_set_master(pci_dev); |
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5aa43c3392a2..e639584b2dbd 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -106,6 +106,9 @@ struct vring_virtqueue | |||
106 | /* How to notify other side. FIXME: commonalize hcalls! */ | 106 | /* How to notify other side. FIXME: commonalize hcalls! */ |
107 | void (*notify)(struct virtqueue *vq); | 107 | void (*notify)(struct virtqueue *vq); |
108 | 108 | ||
109 | /* Index of the queue */ | ||
110 | int queue_index; | ||
111 | |||
109 | #ifdef DEBUG | 112 | #ifdef DEBUG |
110 | /* They're supposed to lock for us. */ | 113 | /* They're supposed to lock for us. */ |
111 | unsigned int in_use; | 114 | unsigned int in_use; |
@@ -171,6 +174,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq, | |||
171 | return head; | 174 | return head; |
172 | } | 175 | } |
173 | 176 | ||
177 | int virtqueue_get_queue_index(struct virtqueue *_vq) | ||
178 | { | ||
179 | struct vring_virtqueue *vq = to_vvq(_vq); | ||
180 | return vq->queue_index; | ||
181 | } | ||
182 | EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); | ||
183 | |||
174 | /** | 184 | /** |
175 | * virtqueue_add_buf - expose buffer to other end | 185 | * virtqueue_add_buf - expose buffer to other end |
176 | * @vq: the struct virtqueue we're talking about. | 186 | * @vq: the struct virtqueue we're talking about. |
@@ -616,7 +626,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq) | |||
616 | } | 626 | } |
617 | EXPORT_SYMBOL_GPL(vring_interrupt); | 627 | EXPORT_SYMBOL_GPL(vring_interrupt); |
618 | 628 | ||
619 | struct virtqueue *vring_new_virtqueue(unsigned int num, | 629 | struct virtqueue *vring_new_virtqueue(unsigned int index, |
630 | unsigned int num, | ||
620 | unsigned int vring_align, | 631 | unsigned int vring_align, |
621 | struct virtio_device *vdev, | 632 | struct virtio_device *vdev, |
622 | bool weak_barriers, | 633 | bool weak_barriers, |
@@ -647,6 +658,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, | |||
647 | vq->broken = false; | 658 | vq->broken = false; |
648 | vq->last_used_idx = 0; | 659 | vq->last_used_idx = 0; |
649 | vq->num_added = 0; | 660 | vq->num_added = 0; |
661 | vq->queue_index = index; | ||
650 | list_add_tail(&vq->vq.list, &vdev->vqs); | 662 | list_add_tail(&vq->vq.list, &vdev->vqs); |
651 | #ifdef DEBUG | 663 | #ifdef DEBUG |
652 | vq->in_use = false; | 664 | vq->in_use = false; |
diff --git a/include/linux/virtio.h b/include/linux/virtio.h index a1ba8bbd9fbe..533b1157f22e 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h | |||
@@ -50,6 +50,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); | |||
50 | 50 | ||
51 | unsigned int virtqueue_get_vring_size(struct virtqueue *vq); | 51 | unsigned int virtqueue_get_vring_size(struct virtqueue *vq); |
52 | 52 | ||
53 | int virtqueue_get_queue_index(struct virtqueue *vq); | ||
54 | |||
53 | /** | 55 | /** |
54 | * virtio_device - representation of a device using virtio | 56 | * virtio_device - representation of a device using virtio |
55 | * @index: unique position on the virtio bus | 57 | * @index: unique position on the virtio bus |
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index fc457f452f64..e2850a7ea276 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h | |||
@@ -84,7 +84,9 @@ | |||
84 | * nvqs: the number of virtqueues to find | 84 | * nvqs: the number of virtqueues to find |
85 | * vqs: on success, includes new virtqueues | 85 | * vqs: on success, includes new virtqueues |
86 | * callbacks: array of callbacks, for each virtqueue | 86 | * callbacks: array of callbacks, for each virtqueue |
87 | * include a NULL entry for vqs that do not need a callback | ||
87 | * names: array of virtqueue names (mainly for debugging) | 88 | * names: array of virtqueue names (mainly for debugging) |
89 | * include a NULL entry for vqs unused by driver | ||
88 | * Returns 0 on success or error status | 90 | * Returns 0 on success or error status |
89 | * @del_vqs: free virtqueues found by find_vqs(). | 91 | * @del_vqs: free virtqueues found by find_vqs(). |
90 | * @get_features: get the array of feature bits for this device. | 92 | * @get_features: get the array of feature bits for this device. |
@@ -98,6 +100,7 @@ | |||
98 | * vdev: the virtio_device | 100 | * vdev: the virtio_device |
99 | * This returns a pointer to the bus name a la pci_name from which | 101 | * This returns a pointer to the bus name a la pci_name from which |
100 | * the caller can then copy. | 102 | * the caller can then copy. |
103 | * @set_vq_affinity: set the affinity for a virtqueue. | ||
101 | */ | 104 | */ |
102 | typedef void vq_callback_t(struct virtqueue *); | 105 | typedef void vq_callback_t(struct virtqueue *); |
103 | struct virtio_config_ops { | 106 | struct virtio_config_ops { |
@@ -116,6 +119,7 @@ struct virtio_config_ops { | |||
116 | u32 (*get_features)(struct virtio_device *vdev); | 119 | u32 (*get_features)(struct virtio_device *vdev); |
117 | void (*finalize_features)(struct virtio_device *vdev); | 120 | void (*finalize_features)(struct virtio_device *vdev); |
118 | const char *(*bus_name)(struct virtio_device *vdev); | 121 | const char *(*bus_name)(struct virtio_device *vdev); |
122 | int (*set_vq_affinity)(struct virtqueue *vq, int cpu); | ||
119 | }; | 123 | }; |
120 | 124 | ||
121 | /* If driver didn't advertise the feature, it will never appear. */ | 125 | /* If driver didn't advertise the feature, it will never appear. */ |
@@ -190,5 +194,24 @@ const char *virtio_bus_name(struct virtio_device *vdev) | |||
190 | return vdev->config->bus_name(vdev); | 194 | return vdev->config->bus_name(vdev); |
191 | } | 195 | } |
192 | 196 | ||
197 | /** | ||
198 | * virtqueue_set_affinity - setting affinity for a virtqueue | ||
199 | * @vq: the virtqueue | ||
200 | * @cpu: the cpu no. | ||
201 | * | ||
202 | * Pay attention the function are best-effort: the affinity hint may not be set | ||
203 | * due to config support, irq type and sharing. | ||
204 | * | ||
205 | */ | ||
206 | static inline | ||
207 | int virtqueue_set_affinity(struct virtqueue *vq, int cpu) | ||
208 | { | ||
209 | struct virtio_device *vdev = vq->vdev; | ||
210 | if (vdev->config->set_vq_affinity) | ||
211 | return vdev->config->set_vq_affinity(vq, cpu); | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | |||
193 | #endif /* __KERNEL__ */ | 216 | #endif /* __KERNEL__ */ |
194 | #endif /* _LINUX_VIRTIO_CONFIG_H */ | 217 | #endif /* _LINUX_VIRTIO_CONFIG_H */ |
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e338730c2660..c2d793a06ad7 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h | |||
@@ -165,7 +165,8 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) | |||
165 | struct virtio_device; | 165 | struct virtio_device; |
166 | struct virtqueue; | 166 | struct virtqueue; |
167 | 167 | ||
168 | struct virtqueue *vring_new_virtqueue(unsigned int num, | 168 | struct virtqueue *vring_new_virtqueue(unsigned int index, |
169 | unsigned int num, | ||
169 | unsigned int vring_align, | 170 | unsigned int vring_align, |
170 | struct virtio_device *vdev, | 171 | struct virtio_device *vdev, |
171 | bool weak_barriers, | 172 | bool weak_barriers, |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cdcb59450b49..31e4f55773f1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -4200,12 +4200,6 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, | |||
4200 | buf->private = 0; | 4200 | buf->private = 0; |
4201 | } | 4201 | } |
4202 | 4202 | ||
4203 | static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
4204 | struct pipe_buffer *buf) | ||
4205 | { | ||
4206 | return 1; | ||
4207 | } | ||
4208 | |||
4209 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, | 4203 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, |
4210 | struct pipe_buffer *buf) | 4204 | struct pipe_buffer *buf) |
4211 | { | 4205 | { |
@@ -4221,7 +4215,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = { | |||
4221 | .unmap = generic_pipe_buf_unmap, | 4215 | .unmap = generic_pipe_buf_unmap, |
4222 | .confirm = generic_pipe_buf_confirm, | 4216 | .confirm = generic_pipe_buf_confirm, |
4223 | .release = buffer_pipe_buf_release, | 4217 | .release = buffer_pipe_buf_release, |
4224 | .steal = buffer_pipe_buf_steal, | 4218 | .steal = generic_pipe_buf_steal, |
4225 | .get = buffer_pipe_buf_get, | 4219 | .get = buffer_pipe_buf_get, |
4226 | }; | 4220 | }; |
4227 | 4221 | ||
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index f759f4f097c7..fd2f9221b241 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c | |||
@@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type) | |||
1299 | dev->feature_len = 0; | 1299 | dev->feature_len = 0; |
1300 | dev->num_vq = 0; | 1300 | dev->num_vq = 0; |
1301 | dev->running = false; | 1301 | dev->running = false; |
1302 | dev->next = NULL; | ||
1302 | 1303 | ||
1303 | /* | 1304 | /* |
1304 | * Append to device list. Prepending to a single-linked list is | 1305 | * Append to device list. Prepending to a single-linked list is |
diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile new file mode 100644 index 000000000000..0d2381633475 --- /dev/null +++ b/tools/virtio/virtio-trace/Makefile | |||
@@ -0,0 +1,13 @@ | |||
1 | CC = gcc | ||
2 | CFLAGS = -O2 -Wall -pthread | ||
3 | |||
4 | all: trace-agent | ||
5 | |||
6 | .c.o: | ||
7 | $(CC) $(CFLAGS) -c $^ -o $@ | ||
8 | |||
9 | trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o | ||
10 | $(CC) $(CFLAGS) -o $@ $^ | ||
11 | |||
12 | clean: | ||
13 | rm -f *.o trace-agent | ||
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README new file mode 100644 index 000000000000..b64845b823ab --- /dev/null +++ b/tools/virtio/virtio-trace/README | |||
@@ -0,0 +1,118 @@ | |||
1 | Trace Agent for virtio-trace | ||
2 | ============================ | ||
3 | |||
4 | Trace agent is a user tool for sending trace data of a guest to a Host in low | ||
5 | overhead. Trace agent has the following functions: | ||
6 | - splice a page of ring-buffer to read_pipe without memory copying | ||
7 | - splice the page from write_pipe to virtio-console without memory copying | ||
8 | - write trace data to stdout by using -o option | ||
9 | - controlled by start/stop orders from a Host | ||
10 | |||
11 | The trace agent operates as follows: | ||
12 | 1) Initialize all structures. | ||
13 | 2) Create a read/write thread per CPU. Each thread is bound to a CPU. | ||
14 | The read/write threads hold it. | ||
15 | 3) A controller thread does poll() for a start order of a host. | ||
16 | 4) After the controller of the trace agent receives a start order from a host, | ||
17 | the controller wake read/write threads. | ||
18 | 5) The read/write threads start to read trace data from ring-buffers and | ||
19 | write the data to virtio-serial. | ||
20 | 6) If the controller receives a stop order from a host, the read/write threads | ||
21 | stop to read trace data. | ||
22 | |||
23 | |||
24 | Files | ||
25 | ===== | ||
26 | |||
27 | README: this file | ||
28 | Makefile: Makefile of trace agent for virtio-trace | ||
29 | trace-agent.c: includes main function, sets up for operating trace agent | ||
30 | trace-agent.h: includes all structures and some macros | ||
31 | trace-agent-ctl.c: includes controller function for read/write threads | ||
32 | trace-agent-rw.c: includes read/write threads function | ||
33 | |||
34 | |||
35 | Setup | ||
36 | ===== | ||
37 | |||
38 | To use this trace agent for virtio-trace, we need to prepare some virtio-serial | ||
39 | I/Fs. | ||
40 | |||
41 | 1) Make FIFO in a host | ||
42 | virtio-trace uses virtio-serial pipe as trace data paths as to the number | ||
43 | of CPUs and a control path, so FIFO (named pipe) should be created as follows: | ||
44 | # mkdir /tmp/virtio-trace/ | ||
45 | # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out} | ||
46 | # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out} | ||
47 | |||
48 | For example, if a guest use three CPUs, the names are | ||
49 | trace-path-cpu{0,1,2}.{in.out} | ||
50 | and | ||
51 | agent-ctl-path.{in,out}. | ||
52 | |||
53 | 2) Set up of virtio-serial pipe in a host | ||
54 | Add qemu option to use virtio-serial pipe. | ||
55 | |||
56 | ##virtio-serial device## | ||
57 | -device virtio-serial-pci,id=virtio-serial0\ | ||
58 | ##control path## | ||
59 | -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\ | ||
60 | -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\ | ||
61 | id=channel0,name=agent-ctl-path\ | ||
62 | ##data path## | ||
63 | -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ | ||
64 | -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ | ||
65 | id=channel1,name=trace-path-cpu0\ | ||
66 | ... | ||
67 | |||
68 | If you manage guests with libvirt, add the following tags to domain XML files. | ||
69 | Then, libvirt passes the same command option to qemu. | ||
70 | |||
71 | <channel type='pipe'> | ||
72 | <source path='/tmp/virtio-trace/agent-ctl-path'/> | ||
73 | <target type='virtio' name='agent-ctl-path'/> | ||
74 | <address type='virtio-serial' controller='0' bus='0' port='0'/> | ||
75 | </channel> | ||
76 | <channel type='pipe'> | ||
77 | <source path='/tmp/virtio-trace/trace-path-cpu0'/> | ||
78 | <target type='virtio' name='trace-path-cpu0'/> | ||
79 | <address type='virtio-serial' controller='0' bus='0' port='1'/> | ||
80 | </channel> | ||
81 | ... | ||
82 | Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For | ||
83 | example, if a guest use three CPUs, chardev names should be trace-path-cpu0, | ||
84 | trace-path-cpu1, trace-path-cpu2, and agent-ctl-path. | ||
85 | |||
86 | 3) Boot the guest | ||
87 | You can find some chardev in /dev/virtio-ports/ in the guest. | ||
88 | |||
89 | |||
90 | Run | ||
91 | === | ||
92 | |||
93 | 0) Build trace agent in a guest | ||
94 | $ make | ||
95 | |||
96 | 1) Enable ftrace in the guest | ||
97 | <Example> | ||
98 | # echo 1 > /sys/kernel/debug/tracing/events/sched/enable | ||
99 | |||
100 | 2) Run trace agent in the guest | ||
101 | This agent must be operated as root. | ||
102 | # ./trace-agent | ||
103 | read/write threads in the agent wait for start order from host. If you add -o | ||
104 | option, trace data are output via stdout in the guest. | ||
105 | |||
106 | 3) Open FIFO in a host | ||
107 | # cat /tmp/virtio-trace/trace-path-cpu0.out | ||
108 | If a host does not open these, trace data get stuck in buffers of virtio. Then, | ||
109 | the guest will stop by specification of chardev in QEMU. This blocking mode may | ||
110 | be solved in the future. | ||
111 | |||
112 | 4) Start to read trace data by ordering from a host | ||
113 | A host injects read start order to the guest via virtio-serial. | ||
114 | # echo 1 > /tmp/virtio-trace/agent-ctl-path.in | ||
115 | |||
116 | 5) Stop to read trace data by ordering from a host | ||
117 | A host injects read stop order to the guest via virtio-serial. | ||
118 | # echo 0 > /tmp/virtio-trace/agent-ctl-path.in | ||
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c new file mode 100644 index 000000000000..a2d0403c4f94 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Controller of read/write threads for virtio-trace | ||
3 | * | ||
4 | * Copyright (C) 2012 Hitachi, Ltd. | ||
5 | * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> | ||
6 | * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> | ||
7 | * | ||
8 | * Licensed under GPL version 2 only. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #define _GNU_SOURCE | ||
13 | #include <fcntl.h> | ||
14 | #include <poll.h> | ||
15 | #include <signal.h> | ||
16 | #include <stdio.h> | ||
17 | #include <stdlib.h> | ||
18 | #include <unistd.h> | ||
19 | #include "trace-agent.h" | ||
20 | |||
21 | #define HOST_MSG_SIZE 256 | ||
22 | #define EVENT_WAIT_MSEC 100 | ||
23 | |||
24 | static volatile sig_atomic_t global_signal_val; | ||
25 | bool global_sig_receive; /* default false */ | ||
26 | bool global_run_operation; /* default false*/ | ||
27 | |||
28 | /* Handle SIGTERM/SIGINT/SIGQUIT to exit */ | ||
29 | static void signal_handler(int sig) | ||
30 | { | ||
31 | global_signal_val = sig; | ||
32 | } | ||
33 | |||
34 | int rw_ctl_init(const char *ctl_path) | ||
35 | { | ||
36 | int ctl_fd; | ||
37 | |||
38 | ctl_fd = open(ctl_path, O_RDONLY); | ||
39 | if (ctl_fd == -1) { | ||
40 | pr_err("Cannot open ctl_fd\n"); | ||
41 | goto error; | ||
42 | } | ||
43 | |||
44 | return ctl_fd; | ||
45 | |||
46 | error: | ||
47 | exit(EXIT_FAILURE); | ||
48 | } | ||
49 | |||
50 | static int wait_order(int ctl_fd) | ||
51 | { | ||
52 | struct pollfd poll_fd; | ||
53 | int ret = 0; | ||
54 | |||
55 | while (!global_sig_receive) { | ||
56 | poll_fd.fd = ctl_fd; | ||
57 | poll_fd.events = POLLIN; | ||
58 | |||
59 | ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC); | ||
60 | |||
61 | if (global_signal_val) { | ||
62 | global_sig_receive = true; | ||
63 | pr_info("Receive interrupt %d\n", global_signal_val); | ||
64 | |||
65 | /* Wakes rw-threads when they are sleeping */ | ||
66 | if (!global_run_operation) | ||
67 | pthread_cond_broadcast(&cond_wakeup); | ||
68 | |||
69 | ret = -1; | ||
70 | break; | ||
71 | } | ||
72 | |||
73 | if (ret < 0) { | ||
74 | pr_err("Polling error\n"); | ||
75 | goto error; | ||
76 | } | ||
77 | |||
78 | if (ret) | ||
79 | break; | ||
80 | }; | ||
81 | |||
82 | return ret; | ||
83 | |||
84 | error: | ||
85 | exit(EXIT_FAILURE); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * contol read/write threads by handling global_run_operation | ||
90 | */ | ||
91 | void *rw_ctl_loop(int ctl_fd) | ||
92 | { | ||
93 | ssize_t rlen; | ||
94 | char buf[HOST_MSG_SIZE]; | ||
95 | int ret; | ||
96 | |||
97 | /* Setup signal handlers */ | ||
98 | signal(SIGTERM, signal_handler); | ||
99 | signal(SIGINT, signal_handler); | ||
100 | signal(SIGQUIT, signal_handler); | ||
101 | |||
102 | while (!global_sig_receive) { | ||
103 | |||
104 | ret = wait_order(ctl_fd); | ||
105 | if (ret < 0) | ||
106 | break; | ||
107 | |||
108 | rlen = read(ctl_fd, buf, sizeof(buf)); | ||
109 | if (rlen < 0) { | ||
110 | pr_err("read data error in ctl thread\n"); | ||
111 | goto error; | ||
112 | } | ||
113 | |||
114 | if (rlen == 2 && buf[0] == '1') { | ||
115 | /* | ||
116 | * If host writes '1' to a control path, | ||
117 | * this controller wakes all read/write threads. | ||
118 | */ | ||
119 | global_run_operation = true; | ||
120 | pthread_cond_broadcast(&cond_wakeup); | ||
121 | pr_debug("Wake up all read/write threads\n"); | ||
122 | } else if (rlen == 2 && buf[0] == '0') { | ||
123 | /* | ||
124 | * If host writes '0' to a control path, read/write | ||
125 | * threads will wait for notification from Host. | ||
126 | */ | ||
127 | global_run_operation = false; | ||
128 | pr_debug("Stop all read/write threads\n"); | ||
129 | } else | ||
130 | pr_info("Invalid host notification: %s\n", buf); | ||
131 | } | ||
132 | |||
133 | return NULL; | ||
134 | |||
135 | error: | ||
136 | exit(EXIT_FAILURE); | ||
137 | } | ||
diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c new file mode 100644 index 000000000000..3aace5ea4842 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-rw.c | |||
@@ -0,0 +1,192 @@ | |||
1 | /* | ||
2 | * Read/write thread of a guest agent for virtio-trace | ||
3 | * | ||
4 | * Copyright (C) 2012 Hitachi, Ltd. | ||
5 | * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> | ||
6 | * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> | ||
7 | * | ||
8 | * Licensed under GPL version 2 only. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #define _GNU_SOURCE | ||
13 | #include <fcntl.h> | ||
14 | #include <stdio.h> | ||
15 | #include <stdlib.h> | ||
16 | #include <unistd.h> | ||
17 | #include <sys/syscall.h> | ||
18 | #include "trace-agent.h" | ||
19 | |||
20 | #define READ_WAIT_USEC 100000 | ||
21 | |||
22 | void *rw_thread_info_new(void) | ||
23 | { | ||
24 | struct rw_thread_info *rw_ti; | ||
25 | |||
26 | rw_ti = zalloc(sizeof(struct rw_thread_info)); | ||
27 | if (rw_ti == NULL) { | ||
28 | pr_err("rw_thread_info zalloc error\n"); | ||
29 | exit(EXIT_FAILURE); | ||
30 | } | ||
31 | |||
32 | rw_ti->cpu_num = -1; | ||
33 | rw_ti->in_fd = -1; | ||
34 | rw_ti->out_fd = -1; | ||
35 | rw_ti->read_pipe = -1; | ||
36 | rw_ti->write_pipe = -1; | ||
37 | rw_ti->pipe_size = PIPE_INIT; | ||
38 | |||
39 | return rw_ti; | ||
40 | } | ||
41 | |||
42 | void *rw_thread_init(int cpu, const char *in_path, const char *out_path, | ||
43 | bool stdout_flag, unsigned long pipe_size, | ||
44 | struct rw_thread_info *rw_ti) | ||
45 | { | ||
46 | int data_pipe[2]; | ||
47 | |||
48 | rw_ti->cpu_num = cpu; | ||
49 | |||
50 | /* set read(input) fd */ | ||
51 | rw_ti->in_fd = open(in_path, O_RDONLY); | ||
52 | if (rw_ti->in_fd == -1) { | ||
53 | pr_err("Could not open in_fd (CPU:%d)\n", cpu); | ||
54 | goto error; | ||
55 | } | ||
56 | |||
57 | /* set write(output) fd */ | ||
58 | if (!stdout_flag) { | ||
59 | /* virtio-serial output mode */ | ||
60 | rw_ti->out_fd = open(out_path, O_WRONLY); | ||
61 | if (rw_ti->out_fd == -1) { | ||
62 | pr_err("Could not open out_fd (CPU:%d)\n", cpu); | ||
63 | goto error; | ||
64 | } | ||
65 | } else | ||
66 | /* stdout mode */ | ||
67 | rw_ti->out_fd = STDOUT_FILENO; | ||
68 | |||
69 | if (pipe2(data_pipe, O_NONBLOCK) < 0) { | ||
70 | pr_err("Could not create pipe in rw-thread(%d)\n", cpu); | ||
71 | goto error; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Size of pipe is 64kB in default based on fs/pipe.c. | ||
76 | * To read/write trace data speedy, pipe size is changed. | ||
77 | */ | ||
78 | if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) { | ||
79 | pr_err("Could not change pipe size in rw-thread(%d)\n", cpu); | ||
80 | goto error; | ||
81 | } | ||
82 | |||
83 | rw_ti->read_pipe = data_pipe[1]; | ||
84 | rw_ti->write_pipe = data_pipe[0]; | ||
85 | rw_ti->pipe_size = pipe_size; | ||
86 | |||
87 | return NULL; | ||
88 | |||
89 | error: | ||
90 | exit(EXIT_FAILURE); | ||
91 | } | ||
92 | |||
93 | /* Bind a thread to a cpu */ | ||
94 | static void bind_cpu(int cpu_num) | ||
95 | { | ||
96 | cpu_set_t mask; | ||
97 | |||
98 | CPU_ZERO(&mask); | ||
99 | CPU_SET(cpu_num, &mask); | ||
100 | |||
101 | /* bind my thread to cpu_num by assigning zero to the first argument */ | ||
102 | if (sched_setaffinity(0, sizeof(mask), &mask) == -1) | ||
103 | pr_err("Could not set CPU#%d affinity\n", (int)cpu_num); | ||
104 | } | ||
105 | |||
106 | static void *rw_thread_main(void *thread_info) | ||
107 | { | ||
108 | ssize_t rlen, wlen; | ||
109 | ssize_t ret; | ||
110 | struct rw_thread_info *ts = (struct rw_thread_info *)thread_info; | ||
111 | |||
112 | bind_cpu(ts->cpu_num); | ||
113 | |||
114 | while (1) { | ||
115 | /* Wait for a read order of trace data by Host OS */ | ||
116 | if (!global_run_operation) { | ||
117 | pthread_mutex_lock(&mutex_notify); | ||
118 | pthread_cond_wait(&cond_wakeup, &mutex_notify); | ||
119 | pthread_mutex_unlock(&mutex_notify); | ||
120 | } | ||
121 | |||
122 | if (global_sig_receive) | ||
123 | break; | ||
124 | |||
125 | /* | ||
126 | * Each thread read trace_pipe_raw of each cpu bounding the | ||
127 | * thread, so contention of multi-threads does not occur. | ||
128 | */ | ||
129 | rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, | ||
130 | ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); | ||
131 | |||
132 | if (rlen < 0) { | ||
133 | pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); | ||
134 | goto error; | ||
135 | } else if (rlen == 0) { | ||
136 | /* | ||
137 | * If trace data do not exist or are unreadable not | ||
138 | * for exceeding the page size, splice_read returns | ||
139 | * NULL. Then, this waits for being filled the data in a | ||
140 | * ring-buffer. | ||
141 | */ | ||
142 | usleep(READ_WAIT_USEC); | ||
143 | pr_debug("Read retry(cpu:%d)\n", ts->cpu_num); | ||
144 | continue; | ||
145 | } | ||
146 | |||
147 | wlen = 0; | ||
148 | |||
149 | do { | ||
150 | ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL, | ||
151 | rlen - wlen, | ||
152 | SPLICE_F_MOVE | SPLICE_F_MORE); | ||
153 | |||
154 | if (ret < 0) { | ||
155 | pr_err("Splice_write in rw-thread(%d)\n", | ||
156 | ts->cpu_num); | ||
157 | goto error; | ||
158 | } else if (ret == 0) | ||
159 | /* | ||
160 | * When host reader is not in time for reading | ||
161 | * trace data, guest will be stopped. This is | ||
162 | * because char dev in QEMU is not supported | ||
163 | * non-blocking mode. Then, writer might be | ||
164 | * sleep in that case. | ||
165 | * This sleep will be removed by supporting | ||
166 | * non-blocking mode. | ||
167 | */ | ||
168 | sleep(1); | ||
169 | wlen += ret; | ||
170 | } while (wlen < rlen); | ||
171 | } | ||
172 | |||
173 | return NULL; | ||
174 | |||
175 | error: | ||
176 | exit(EXIT_FAILURE); | ||
177 | } | ||
178 | |||
179 | |||
180 | pthread_t rw_thread_run(struct rw_thread_info *rw_ti) | ||
181 | { | ||
182 | int ret; | ||
183 | pthread_t rw_thread_per_cpu; | ||
184 | |||
185 | ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti); | ||
186 | if (ret != 0) { | ||
187 | pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num); | ||
188 | exit(EXIT_FAILURE); | ||
189 | } | ||
190 | |||
191 | return rw_thread_per_cpu; | ||
192 | } | ||
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c new file mode 100644 index 000000000000..0a0a7dd4eff7 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.c | |||
@@ -0,0 +1,270 @@ | |||
1 | /* | ||
2 | * Guest agent for virtio-trace | ||
3 | * | ||
4 | * Copyright (C) 2012 Hitachi, Ltd. | ||
5 | * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> | ||
6 | * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> | ||
7 | * | ||
8 | * Licensed under GPL version 2 only. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #define _GNU_SOURCE | ||
13 | #include <limits.h> | ||
14 | #include <stdio.h> | ||
15 | #include <stdlib.h> | ||
16 | #include <unistd.h> | ||
17 | #include "trace-agent.h" | ||
18 | |||
19 | #define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) | ||
20 | #define PIPE_DEF_BUFS 16 | ||
21 | #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) | ||
22 | #define PIPE_MAX_SIZE (1024*1024) | ||
23 | #define READ_PATH_FMT \ | ||
24 | "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" | ||
25 | #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" | ||
26 | #define CTL_PATH "/dev/virtio-ports/agent-ctl-path" | ||
27 | |||
28 | pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER; | ||
29 | pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER; | ||
30 | |||
31 | static int get_total_cpus(void) | ||
32 | { | ||
33 | int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF); | ||
34 | |||
35 | if (nr_cpus <= 0) { | ||
36 | pr_err("Could not read cpus\n"); | ||
37 | goto error; | ||
38 | } else if (nr_cpus > MAX_CPUS) { | ||
39 | pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS); | ||
40 | goto error; | ||
41 | } | ||
42 | |||
43 | return nr_cpus; | ||
44 | |||
45 | error: | ||
46 | exit(EXIT_FAILURE); | ||
47 | } | ||
48 | |||
49 | static void *agent_info_new(void) | ||
50 | { | ||
51 | struct agent_info *s; | ||
52 | int i; | ||
53 | |||
54 | s = zalloc(sizeof(struct agent_info)); | ||
55 | if (s == NULL) { | ||
56 | pr_err("agent_info zalloc error\n"); | ||
57 | exit(EXIT_FAILURE); | ||
58 | } | ||
59 | |||
60 | s->pipe_size = PIPE_INIT; | ||
61 | s->use_stdout = false; | ||
62 | s->cpus = get_total_cpus(); | ||
63 | s->ctl_fd = -1; | ||
64 | |||
65 | /* read/write threads init */ | ||
66 | for (i = 0; i < s->cpus; i++) | ||
67 | s->rw_ti[i] = rw_thread_info_new(); | ||
68 | |||
69 | return s; | ||
70 | } | ||
71 | |||
72 | static unsigned long parse_size(const char *arg) | ||
73 | { | ||
74 | unsigned long value, round; | ||
75 | char *ptr; | ||
76 | |||
77 | value = strtoul(arg, &ptr, 10); | ||
78 | switch (*ptr) { | ||
79 | case 'K': case 'k': | ||
80 | value <<= 10; | ||
81 | break; | ||
82 | case 'M': case 'm': | ||
83 | value <<= 20; | ||
84 | break; | ||
85 | default: | ||
86 | break; | ||
87 | } | ||
88 | |||
89 | if (value > PIPE_MAX_SIZE) { | ||
90 | pr_err("Pipe size must be less than 1MB\n"); | ||
91 | goto error; | ||
92 | } else if (value < PIPE_MIN_SIZE) { | ||
93 | pr_err("Pipe size must be over 64KB\n"); | ||
94 | goto error; | ||
95 | } | ||
96 | |||
97 | /* Align buffer size with page unit */ | ||
98 | round = value & (PAGE_SIZE - 1); | ||
99 | value = value - round; | ||
100 | |||
101 | return value; | ||
102 | error: | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static void usage(char const *prg) | ||
107 | { | ||
108 | pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg); | ||
109 | } | ||
110 | |||
111 | static const char *make_path(int cpu_num, bool this_is_write_path) | ||
112 | { | ||
113 | int ret; | ||
114 | char *buf; | ||
115 | |||
116 | buf = zalloc(PATH_MAX); | ||
117 | if (buf == NULL) { | ||
118 | pr_err("Could not allocate buffer\n"); | ||
119 | goto error; | ||
120 | } | ||
121 | |||
122 | if (this_is_write_path) | ||
123 | /* write(output) path */ | ||
124 | ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); | ||
125 | else | ||
126 | /* read(input) path */ | ||
127 | ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); | ||
128 | |||
129 | if (ret <= 0) { | ||
130 | pr_err("Failed to generate %s path(CPU#%d):%d\n", | ||
131 | this_is_write_path ? "read" : "write", cpu_num, ret); | ||
132 | goto error; | ||
133 | } | ||
134 | |||
135 | return buf; | ||
136 | |||
137 | error: | ||
138 | free(buf); | ||
139 | return NULL; | ||
140 | } | ||
141 | |||
142 | static const char *make_input_path(int cpu_num) | ||
143 | { | ||
144 | return make_path(cpu_num, false); | ||
145 | } | ||
146 | |||
147 | static const char *make_output_path(int cpu_num) | ||
148 | { | ||
149 | return make_path(cpu_num, true); | ||
150 | } | ||
151 | |||
152 | static void *agent_info_init(struct agent_info *s) | ||
153 | { | ||
154 | int cpu; | ||
155 | const char *in_path = NULL; | ||
156 | const char *out_path = NULL; | ||
157 | |||
158 | /* init read/write threads */ | ||
159 | for (cpu = 0; cpu < s->cpus; cpu++) { | ||
160 | /* set read(input) path per read/write thread */ | ||
161 | in_path = make_input_path(cpu); | ||
162 | if (in_path == NULL) | ||
163 | goto error; | ||
164 | |||
165 | /* set write(output) path per read/write thread*/ | ||
166 | if (!s->use_stdout) { | ||
167 | out_path = make_output_path(cpu); | ||
168 | if (out_path == NULL) | ||
169 | goto error; | ||
170 | } else | ||
171 | /* stdout mode */ | ||
172 | pr_debug("stdout mode\n"); | ||
173 | |||
174 | rw_thread_init(cpu, in_path, out_path, s->use_stdout, | ||
175 | s->pipe_size, s->rw_ti[cpu]); | ||
176 | } | ||
177 | |||
178 | /* init controller of read/write threads */ | ||
179 | s->ctl_fd = rw_ctl_init((const char *)CTL_PATH); | ||
180 | |||
181 | return NULL; | ||
182 | |||
183 | error: | ||
184 | exit(EXIT_FAILURE); | ||
185 | } | ||
186 | |||
187 | static void *parse_args(int argc, char *argv[], struct agent_info *s) | ||
188 | { | ||
189 | int cmd; | ||
190 | unsigned long size; | ||
191 | |||
192 | while ((cmd = getopt(argc, argv, "hos:")) != -1) { | ||
193 | switch (cmd) { | ||
194 | /* stdout mode */ | ||
195 | case 'o': | ||
196 | s->use_stdout = true; | ||
197 | break; | ||
198 | /* size of pipe */ | ||
199 | case 's': | ||
200 | size = parse_size(optarg); | ||
201 | if (size == 0) | ||
202 | goto error; | ||
203 | s->pipe_size = size; | ||
204 | break; | ||
205 | case 'h': | ||
206 | default: | ||
207 | usage(argv[0]); | ||
208 | goto error; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | agent_info_init(s); | ||
213 | |||
214 | return NULL; | ||
215 | |||
216 | error: | ||
217 | exit(EXIT_FAILURE); | ||
218 | } | ||
219 | |||
220 | static void agent_main_loop(struct agent_info *s) | ||
221 | { | ||
222 | int cpu; | ||
223 | pthread_t rw_thread_per_cpu[MAX_CPUS]; | ||
224 | |||
225 | /* Start all read/write threads */ | ||
226 | for (cpu = 0; cpu < s->cpus; cpu++) | ||
227 | rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]); | ||
228 | |||
229 | rw_ctl_loop(s->ctl_fd); | ||
230 | |||
231 | /* Finish all read/write threads */ | ||
232 | for (cpu = 0; cpu < s->cpus; cpu++) { | ||
233 | int ret; | ||
234 | |||
235 | ret = pthread_join(rw_thread_per_cpu[cpu], NULL); | ||
236 | if (ret != 0) { | ||
237 | pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu); | ||
238 | exit(EXIT_FAILURE); | ||
239 | } | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void agent_info_free(struct agent_info *s) | ||
244 | { | ||
245 | int i; | ||
246 | |||
247 | close(s->ctl_fd); | ||
248 | for (i = 0; i < s->cpus; i++) { | ||
249 | close(s->rw_ti[i]->in_fd); | ||
250 | close(s->rw_ti[i]->out_fd); | ||
251 | close(s->rw_ti[i]->read_pipe); | ||
252 | close(s->rw_ti[i]->write_pipe); | ||
253 | free(s->rw_ti[i]); | ||
254 | } | ||
255 | free(s); | ||
256 | } | ||
257 | |||
258 | int main(int argc, char *argv[]) | ||
259 | { | ||
260 | struct agent_info *s = NULL; | ||
261 | |||
262 | s = agent_info_new(); | ||
263 | parse_args(argc, argv, s); | ||
264 | |||
265 | agent_main_loop(s); | ||
266 | |||
267 | agent_info_free(s); | ||
268 | |||
269 | return 0; | ||
270 | } | ||
diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h new file mode 100644 index 000000000000..8de79bfeaa73 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.h | |||
@@ -0,0 +1,75 @@ | |||
1 | #ifndef __TRACE_AGENT_H__ | ||
2 | #define __TRACE_AGENT_H__ | ||
3 | #include <pthread.h> | ||
4 | #include <stdbool.h> | ||
5 | |||
6 | #define MAX_CPUS 256 | ||
7 | #define PIPE_INIT (1024*1024) | ||
8 | |||
9 | /* | ||
10 | * agent_info - structure managing total information of guest agent | ||
11 | * @pipe_size: size of pipe (default 1MB) | ||
12 | * @use_stdout: set to true when o option is added (default false) | ||
13 | * @cpus: total number of CPUs | ||
14 | * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path | ||
15 | * @rw_ti: structure managing information of read/write threads | ||
16 | */ | ||
17 | struct agent_info { | ||
18 | unsigned long pipe_size; | ||
19 | bool use_stdout; | ||
20 | int cpus; | ||
21 | int ctl_fd; | ||
22 | struct rw_thread_info *rw_ti[MAX_CPUS]; | ||
23 | }; | ||
24 | |||
25 | /* | ||
26 | * rw_thread_info - structure managing a read/write thread a cpu | ||
27 | * @cpu_num: cpu number operating this read/write thread | ||
28 | * @in_fd: fd of reading trace data path in cpu_num | ||
29 | * @out_fd: fd of writing trace data path in cpu_num | ||
30 | * @read_pipe: fd of read pipe | ||
31 | * @write_pipe: fd of write pipe | ||
32 | * @pipe_size: size of pipe (default 1MB) | ||
33 | */ | ||
34 | struct rw_thread_info { | ||
35 | int cpu_num; | ||
36 | int in_fd; | ||
37 | int out_fd; | ||
38 | int read_pipe; | ||
39 | int write_pipe; | ||
40 | unsigned long pipe_size; | ||
41 | }; | ||
42 | |||
43 | /* use for stopping rw threads */ | ||
44 | extern bool global_sig_receive; | ||
45 | |||
46 | /* use for notification */ | ||
47 | extern bool global_run_operation; | ||
48 | extern pthread_mutex_t mutex_notify; | ||
49 | extern pthread_cond_t cond_wakeup; | ||
50 | |||
51 | /* for controller of read/write threads */ | ||
52 | extern int rw_ctl_init(const char *ctl_path); | ||
53 | extern void *rw_ctl_loop(int ctl_fd); | ||
54 | |||
55 | /* for trace read/write thread */ | ||
56 | extern void *rw_thread_info_new(void); | ||
57 | extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path, | ||
58 | bool stdout_flag, unsigned long pipe_size, | ||
59 | struct rw_thread_info *rw_ti); | ||
60 | extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti); | ||
61 | |||
62 | static inline void *zalloc(size_t size) | ||
63 | { | ||
64 | return calloc(1, size); | ||
65 | } | ||
66 | |||
67 | #define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) | ||
68 | #define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) | ||
69 | #ifdef DEBUG | ||
70 | #define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) | ||
71 | #else | ||
72 | #define pr_debug(format, ...) do {} while (0) | ||
73 | #endif | ||
74 | |||
75 | #endif /*__TRACE_AGENT_H__*/ | ||