aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-07 08:04:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-07 08:04:56 -0400
commitdc92b1f9ab1e1665dbbc56911782358e7f9a49f9 (patch)
tree965ccb4a0f2c24a8b24adce415f6506246d07a90
parent5e090ed7af10729a396a25df43d69a236e789736 (diff)
parentca16f580a5db7e60bfafe59a50bb133bd3347491 (diff)
Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio changes from Rusty Russell: "New workflow: same git trees pulled by linux-next get sent straight to Linus. Git is awkward at shuffling patches compared with quilt or mq, but that doesn't happen often once things get into my -next branch." * 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits) lguest: fix occasional crash in example launcher. virtio-blk: Disable callback in virtblk_done() virtio_mmio: Don't attempt to create empty virtqueues virtio_mmio: fix off by one error allocating queue drivers/virtio/virtio_pci.c: fix error return code virtio: don't crash when device is buggy virtio: remove CONFIG_VIRTIO_RING virtio: add help to CONFIG_VIRTIO option. virtio: support reserved vqs virtio: introduce an API to set affinity for a virtqueue virtio-ring: move queue_index to vring_virtqueue virtio_balloon: not EXPERIMENTAL any more. virtio-balloon: dependency fix virtio-blk: fix NULL checking in virtblk_alloc_req() virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path virtio-blk: Add bio-based IO path for virtio-blk virtio: console: fix error handling in init() function tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace tools: Add guest trace agent as a user tool virtio/console: Allocate scatterlist according to the current pipe size ...
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--drivers/block/virtio_blk.c306
-rw-r--r--drivers/char/virtio_console.c198
-rw-r--r--drivers/lguest/lguest_device.c5
-rw-r--r--drivers/remoteproc/remoteproc_virtio.c5
-rw-r--r--drivers/rpmsg/Kconfig1
-rw-r--r--drivers/s390/kvm/kvm_virtio.c5
-rw-r--r--drivers/virtio/Kconfig17
-rw-r--r--drivers/virtio/Makefile3
-rw-r--r--drivers/virtio/virtio.c2
-rw-r--r--drivers/virtio/virtio_mmio.c29
-rw-r--r--drivers/virtio/virtio_pci.c68
-rw-r--r--drivers/virtio/virtio_ring.c14
-rw-r--r--include/linux/virtio.h2
-rw-r--r--include/linux/virtio_config.h23
-rw-r--r--include/linux/virtio_ring.h3
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--tools/lguest/lguest.c1
-rw-r--r--tools/virtio/virtio-trace/Makefile13
-rw-r--r--tools/virtio/virtio-trace/README118
-rw-r--r--tools/virtio/virtio-trace/trace-agent-ctl.c137
-rw-r--r--tools/virtio/virtio-trace/trace-agent-rw.c192
-rw-r--r--tools/virtio/virtio-trace/trace-agent.c270
-rw-r--r--tools/virtio/virtio-trace/trace-agent.h75
25 files changed, 1391 insertions, 106 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f9acddd9ace3..c8af429991d9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -656,7 +656,6 @@ config S390_GUEST
656 depends on 64BIT && EXPERIMENTAL 656 depends on 64BIT && EXPERIMENTAL
657 select VIRTUALIZATION 657 select VIRTUALIZATION
658 select VIRTIO 658 select VIRTIO
659 select VIRTIO_RING
660 select VIRTIO_CONSOLE 659 select VIRTIO_CONSOLE
661 help 660 help
662 Enabling this option adds support for virtio based paravirtual device 661 Enabling this option adds support for virtio based paravirtual device
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 6e121a2a49e1..7872a3330fb5 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -4,7 +4,6 @@ config LGUEST_GUEST
4 depends on X86_32 4 depends on X86_32
5 select VIRTUALIZATION 5 select VIRTUALIZATION
6 select VIRTIO 6 select VIRTIO
7 select VIRTIO_RING
8 select VIRTIO_CONSOLE 7 select VIRTIO_CONSOLE
9 help 8 help
10 Lguest is a tiny in-kernel hypervisor. Selecting this will 9 Lguest is a tiny in-kernel hypervisor. Selecting this will
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0bbeb470754..0bdde8fba397 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -14,6 +14,9 @@
14 14
15#define PART_BITS 4 15#define PART_BITS 4
16 16
17static bool use_bio;
18module_param(use_bio, bool, S_IRUGO);
19
17static int major; 20static int major;
18static DEFINE_IDA(vd_index_ida); 21static DEFINE_IDA(vd_index_ida);
19 22
@@ -23,6 +26,7 @@ struct virtio_blk
23{ 26{
24 struct virtio_device *vdev; 27 struct virtio_device *vdev;
25 struct virtqueue *vq; 28 struct virtqueue *vq;
29 wait_queue_head_t queue_wait;
26 30
27 /* The disk structure for the kernel. */ 31 /* The disk structure for the kernel. */
28 struct gendisk *disk; 32 struct gendisk *disk;
@@ -51,53 +55,244 @@ struct virtio_blk
51struct virtblk_req 55struct virtblk_req
52{ 56{
53 struct request *req; 57 struct request *req;
58 struct bio *bio;
54 struct virtio_blk_outhdr out_hdr; 59 struct virtio_blk_outhdr out_hdr;
55 struct virtio_scsi_inhdr in_hdr; 60 struct virtio_scsi_inhdr in_hdr;
61 struct work_struct work;
62 struct virtio_blk *vblk;
63 int flags;
56 u8 status; 64 u8 status;
65 struct scatterlist sg[];
66};
67
68enum {
69 VBLK_IS_FLUSH = 1,
70 VBLK_REQ_FLUSH = 2,
71 VBLK_REQ_DATA = 4,
72 VBLK_REQ_FUA = 8,
57}; 73};
58 74
59static void blk_done(struct virtqueue *vq) 75static inline int virtblk_result(struct virtblk_req *vbr)
76{
77 switch (vbr->status) {
78 case VIRTIO_BLK_S_OK:
79 return 0;
80 case VIRTIO_BLK_S_UNSUPP:
81 return -ENOTTY;
82 default:
83 return -EIO;
84 }
85}
86
87static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 gfp_t gfp_mask)
60{ 89{
61 struct virtio_blk *vblk = vq->vdev->priv;
62 struct virtblk_req *vbr; 90 struct virtblk_req *vbr;
63 unsigned int len;
64 unsigned long flags;
65 91
66 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); 92 vbr = mempool_alloc(vblk->pool, gfp_mask);
67 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 93 if (!vbr)
68 int error; 94 return NULL;
69 95
70 switch (vbr->status) { 96 vbr->vblk = vblk;
71 case VIRTIO_BLK_S_OK: 97 if (use_bio)
72 error = 0; 98 sg_init_table(vbr->sg, vblk->sg_elems);
73 break; 99
74 case VIRTIO_BLK_S_UNSUPP: 100 return vbr;
75 error = -ENOTTY; 101}
76 break; 102
77 default: 103static void virtblk_add_buf_wait(struct virtio_blk *vblk,
78 error = -EIO; 104 struct virtblk_req *vbr,
105 unsigned long out,
106 unsigned long in)
107{
108 DEFINE_WAIT(wait);
109
110 for (;;) {
111 prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
112 TASK_UNINTERRUPTIBLE);
113
114 spin_lock_irq(vblk->disk->queue->queue_lock);
115 if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
116 GFP_ATOMIC) < 0) {
117 spin_unlock_irq(vblk->disk->queue->queue_lock);
118 io_schedule();
119 } else {
120 virtqueue_kick(vblk->vq);
121 spin_unlock_irq(vblk->disk->queue->queue_lock);
79 break; 122 break;
80 } 123 }
81 124
82 switch (vbr->req->cmd_type) { 125 }
83 case REQ_TYPE_BLOCK_PC: 126
84 vbr->req->resid_len = vbr->in_hdr.residual; 127 finish_wait(&vblk->queue_wait, &wait);
85 vbr->req->sense_len = vbr->in_hdr.sense_len; 128}
86 vbr->req->errors = vbr->in_hdr.errors; 129
87 break; 130static inline void virtblk_add_req(struct virtblk_req *vbr,
88 case REQ_TYPE_SPECIAL: 131 unsigned int out, unsigned int in)
89 vbr->req->errors = (error != 0); 132{
90 break; 133 struct virtio_blk *vblk = vbr->vblk;
91 default: 134
92 break; 135 spin_lock_irq(vblk->disk->queue->queue_lock);
136 if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
137 GFP_ATOMIC) < 0)) {
138 spin_unlock_irq(vblk->disk->queue->queue_lock);
139 virtblk_add_buf_wait(vblk, vbr, out, in);
140 return;
141 }
142 virtqueue_kick(vblk->vq);
143 spin_unlock_irq(vblk->disk->queue->queue_lock);
144}
145
146static int virtblk_bio_send_flush(struct virtblk_req *vbr)
147{
148 unsigned int out = 0, in = 0;
149
150 vbr->flags |= VBLK_IS_FLUSH;
151 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
152 vbr->out_hdr.sector = 0;
153 vbr->out_hdr.ioprio = 0;
154 sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
155 sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
156
157 virtblk_add_req(vbr, out, in);
158
159 return 0;
160}
161
162static int virtblk_bio_send_data(struct virtblk_req *vbr)
163{
164 struct virtio_blk *vblk = vbr->vblk;
165 unsigned int num, out = 0, in = 0;
166 struct bio *bio = vbr->bio;
167
168 vbr->flags &= ~VBLK_IS_FLUSH;
169 vbr->out_hdr.type = 0;
170 vbr->out_hdr.sector = bio->bi_sector;
171 vbr->out_hdr.ioprio = bio_prio(bio);
172
173 sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
174
175 num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
176
177 sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
178 sizeof(vbr->status));
179
180 if (num) {
181 if (bio->bi_rw & REQ_WRITE) {
182 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
183 out += num;
184 } else {
185 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
186 in += num;
93 } 187 }
188 }
189
190 virtblk_add_req(vbr, out, in);
191
192 return 0;
193}
194
195static void virtblk_bio_send_data_work(struct work_struct *work)
196{
197 struct virtblk_req *vbr;
198
199 vbr = container_of(work, struct virtblk_req, work);
200
201 virtblk_bio_send_data(vbr);
202}
203
204static void virtblk_bio_send_flush_work(struct work_struct *work)
205{
206 struct virtblk_req *vbr;
207
208 vbr = container_of(work, struct virtblk_req, work);
209
210 virtblk_bio_send_flush(vbr);
211}
212
213static inline void virtblk_request_done(struct virtblk_req *vbr)
214{
215 struct virtio_blk *vblk = vbr->vblk;
216 struct request *req = vbr->req;
217 int error = virtblk_result(vbr);
218
219 if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
220 req->resid_len = vbr->in_hdr.residual;
221 req->sense_len = vbr->in_hdr.sense_len;
222 req->errors = vbr->in_hdr.errors;
223 } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
224 req->errors = (error != 0);
225 }
226
227 __blk_end_request_all(req, error);
228 mempool_free(vbr, vblk->pool);
229}
230
231static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
232{
233 struct virtio_blk *vblk = vbr->vblk;
234
235 if (vbr->flags & VBLK_REQ_DATA) {
236 /* Send out the actual write data */
237 INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
238 queue_work(virtblk_wq, &vbr->work);
239 } else {
240 bio_endio(vbr->bio, virtblk_result(vbr));
241 mempool_free(vbr, vblk->pool);
242 }
243}
244
245static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
246{
247 struct virtio_blk *vblk = vbr->vblk;
94 248
95 __blk_end_request_all(vbr->req, error); 249 if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
250 /* Send out a flush before end the bio */
251 vbr->flags &= ~VBLK_REQ_DATA;
252 INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
253 queue_work(virtblk_wq, &vbr->work);
254 } else {
255 bio_endio(vbr->bio, virtblk_result(vbr));
96 mempool_free(vbr, vblk->pool); 256 mempool_free(vbr, vblk->pool);
97 } 257 }
258}
259
260static inline void virtblk_bio_done(struct virtblk_req *vbr)
261{
262 if (unlikely(vbr->flags & VBLK_IS_FLUSH))
263 virtblk_bio_flush_done(vbr);
264 else
265 virtblk_bio_data_done(vbr);
266}
267
268static void virtblk_done(struct virtqueue *vq)
269{
270 struct virtio_blk *vblk = vq->vdev->priv;
271 bool bio_done = false, req_done = false;
272 struct virtblk_req *vbr;
273 unsigned long flags;
274 unsigned int len;
275
276 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
277 do {
278 virtqueue_disable_cb(vq);
279 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
280 if (vbr->bio) {
281 virtblk_bio_done(vbr);
282 bio_done = true;
283 } else {
284 virtblk_request_done(vbr);
285 req_done = true;
286 }
287 }
288 } while (!virtqueue_enable_cb(vq));
98 /* In case queue is stopped waiting for more buffers. */ 289 /* In case queue is stopped waiting for more buffers. */
99 blk_start_queue(vblk->disk->queue); 290 if (req_done)
291 blk_start_queue(vblk->disk->queue);
100 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); 292 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
293
294 if (bio_done)
295 wake_up(&vblk->queue_wait);
101} 296}
102 297
103static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 298static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
106 unsigned long num, out = 0, in = 0; 301 unsigned long num, out = 0, in = 0;
107 struct virtblk_req *vbr; 302 struct virtblk_req *vbr;
108 303
109 vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); 304 vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
110 if (!vbr) 305 if (!vbr)
111 /* When another request finishes we'll try again. */ 306 /* When another request finishes we'll try again. */
112 return false; 307 return false;
113 308
114 vbr->req = req; 309 vbr->req = req;
115 310 vbr->bio = NULL;
116 if (req->cmd_flags & REQ_FLUSH) { 311 if (req->cmd_flags & REQ_FLUSH) {
117 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 312 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
118 vbr->out_hdr.sector = 0; 313 vbr->out_hdr.sector = 0;
@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
172 } 367 }
173 } 368 }
174 369
175 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { 370 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
371 GFP_ATOMIC) < 0) {
176 mempool_free(vbr, vblk->pool); 372 mempool_free(vbr, vblk->pool);
177 return false; 373 return false;
178 } 374 }
@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
180 return true; 376 return true;
181} 377}
182 378
183static void do_virtblk_request(struct request_queue *q) 379static void virtblk_request(struct request_queue *q)
184{ 380{
185 struct virtio_blk *vblk = q->queuedata; 381 struct virtio_blk *vblk = q->queuedata;
186 struct request *req; 382 struct request *req;
@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q)
203 virtqueue_kick(vblk->vq); 399 virtqueue_kick(vblk->vq);
204} 400}
205 401
402static void virtblk_make_request(struct request_queue *q, struct bio *bio)
403{
404 struct virtio_blk *vblk = q->queuedata;
405 struct virtblk_req *vbr;
406
407 BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
408
409 vbr = virtblk_alloc_req(vblk, GFP_NOIO);
410 if (!vbr) {
411 bio_endio(bio, -ENOMEM);
412 return;
413 }
414
415 vbr->bio = bio;
416 vbr->flags = 0;
417 if (bio->bi_rw & REQ_FLUSH)
418 vbr->flags |= VBLK_REQ_FLUSH;
419 if (bio->bi_rw & REQ_FUA)
420 vbr->flags |= VBLK_REQ_FUA;
421 if (bio->bi_size)
422 vbr->flags |= VBLK_REQ_DATA;
423
424 if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
425 virtblk_bio_send_flush(vbr);
426 else
427 virtblk_bio_send_data(vbr);
428}
429
206/* return id (s/n) string for *disk to *id_str 430/* return id (s/n) string for *disk to *id_str
207 */ 431 */
208static int virtblk_get_id(struct gendisk *disk, char *id_str) 432static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk)
360 int err = 0; 584 int err = 0;
361 585
362 /* We expect one virtqueue, for output. */ 586 /* We expect one virtqueue, for output. */
363 vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); 587 vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
364 if (IS_ERR(vblk->vq)) 588 if (IS_ERR(vblk->vq))
365 err = PTR_ERR(vblk->vq); 589 err = PTR_ERR(vblk->vq);
366 590
@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
477 struct virtio_blk *vblk; 701 struct virtio_blk *vblk;
478 struct request_queue *q; 702 struct request_queue *q;
479 int err, index; 703 int err, index;
704 int pool_size;
705
480 u64 cap; 706 u64 cap;
481 u32 v, blk_size, sg_elems, opt_io_size; 707 u32 v, blk_size, sg_elems, opt_io_size;
482 u16 min_io_size; 708 u16 min_io_size;
@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
506 goto out_free_index; 732 goto out_free_index;
507 } 733 }
508 734
735 init_waitqueue_head(&vblk->queue_wait);
509 vblk->vdev = vdev; 736 vblk->vdev = vdev;
510 vblk->sg_elems = sg_elems; 737 vblk->sg_elems = sg_elems;
511 sg_init_table(vblk->sg, vblk->sg_elems); 738 sg_init_table(vblk->sg, vblk->sg_elems);
512 mutex_init(&vblk->config_lock); 739 mutex_init(&vblk->config_lock);
740
513 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 741 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
514 vblk->config_enable = true; 742 vblk->config_enable = true;
515 743
@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
517 if (err) 745 if (err)
518 goto out_free_vblk; 746 goto out_free_vblk;
519 747
520 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); 748 pool_size = sizeof(struct virtblk_req);
749 if (use_bio)
750 pool_size += sizeof(struct scatterlist) * sg_elems;
751 vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
521 if (!vblk->pool) { 752 if (!vblk->pool) {
522 err = -ENOMEM; 753 err = -ENOMEM;
523 goto out_free_vq; 754 goto out_free_vq;
@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
530 goto out_mempool; 761 goto out_mempool;
531 } 762 }
532 763
533 q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); 764 q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
534 if (!q) { 765 if (!q) {
535 err = -ENOMEM; 766 err = -ENOMEM;
536 goto out_put_disk; 767 goto out_put_disk;
537 } 768 }
538 769
770 if (use_bio)
771 blk_queue_make_request(q, virtblk_make_request);
539 q->queuedata = vblk; 772 q->queuedata = vblk;
540 773
541 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 774 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
620 if (!err && opt_io_size) 853 if (!err && opt_io_size)
621 blk_queue_io_opt(q, blk_size * opt_io_size); 854 blk_queue_io_opt(q, blk_size * opt_io_size);
622 855
623
624 add_disk(vblk->disk); 856 add_disk(vblk->disk);
625 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); 857 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
626 if (err) 858 if (err)
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 060a672ebb7b..8ab9c3d4bf13 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -24,6 +24,8 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/splice.h>
28#include <linux/pagemap.h>
27#include <linux/init.h> 29#include <linux/init.h>
28#include <linux/list.h> 30#include <linux/list.h>
29#include <linux/poll.h> 31#include <linux/poll.h>
@@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event,
474 return 0; 476 return 0;
475} 477}
476 478
479struct buffer_token {
480 union {
481 void *buf;
482 struct scatterlist *sg;
483 } u;
484 /* If sgpages == 0 then buf is used, else sg is used */
485 unsigned int sgpages;
486};
487
488static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages)
489{
490 int i;
491 struct page *page;
492
493 for (i = 0; i < nrpages; i++) {
494 page = sg_page(&sg[i]);
495 if (!page)
496 break;
497 put_page(page);
498 }
499 kfree(sg);
500}
501
477/* Callers must take the port->outvq_lock */ 502/* Callers must take the port->outvq_lock */
478static void reclaim_consumed_buffers(struct port *port) 503static void reclaim_consumed_buffers(struct port *port)
479{ 504{
480 void *buf; 505 struct buffer_token *tok;
481 unsigned int len; 506 unsigned int len;
482 507
483 if (!port->portdev) { 508 if (!port->portdev) {
484 /* Device has been unplugged. vqs are already gone. */ 509 /* Device has been unplugged. vqs are already gone. */
485 return; 510 return;
486 } 511 }
487 while ((buf = virtqueue_get_buf(port->out_vq, &len))) { 512 while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
488 kfree(buf); 513 if (tok->sgpages)
514 reclaim_sg_pages(tok->u.sg, tok->sgpages);
515 else
516 kfree(tok->u.buf);
517 kfree(tok);
489 port->outvq_full = false; 518 port->outvq_full = false;
490 } 519 }
491} 520}
492 521
493static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, 522static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
494 bool nonblock) 523 int nents, size_t in_count,
524 struct buffer_token *tok, bool nonblock)
495{ 525{
496 struct scatterlist sg[1];
497 struct virtqueue *out_vq; 526 struct virtqueue *out_vq;
498 ssize_t ret; 527 ssize_t ret;
499 unsigned long flags; 528 unsigned long flags;
@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
505 534
506 reclaim_consumed_buffers(port); 535 reclaim_consumed_buffers(port);
507 536
508 sg_init_one(sg, in_buf, in_count); 537 ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
509 ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
510 538
511 /* Tell Host to go! */ 539 /* Tell Host to go! */
512 virtqueue_kick(out_vq); 540 virtqueue_kick(out_vq);
@@ -544,6 +572,37 @@ done:
544 return in_count; 572 return in_count;
545} 573}
546 574
575static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
576 bool nonblock)
577{
578 struct scatterlist sg[1];
579 struct buffer_token *tok;
580
581 tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
582 if (!tok)
583 return -ENOMEM;
584 tok->sgpages = 0;
585 tok->u.buf = in_buf;
586
587 sg_init_one(sg, in_buf, in_count);
588
589 return __send_to_port(port, sg, 1, in_count, tok, nonblock);
590}
591
592static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
593 size_t in_count, bool nonblock)
594{
595 struct buffer_token *tok;
596
597 tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
598 if (!tok)
599 return -ENOMEM;
600 tok->sgpages = nents;
601 tok->u.sg = sg;
602
603 return __send_to_port(port, sg, nents, in_count, tok, nonblock);
604}
605
547/* 606/*
548 * Give out the data that's requested from the buffer that we have 607 * Give out the data that's requested from the buffer that we have
549 * queued up. 608 * queued up.
@@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
665 return fill_readbuf(port, ubuf, count, true); 724 return fill_readbuf(port, ubuf, count, true);
666} 725}
667 726
727static int wait_port_writable(struct port *port, bool nonblock)
728{
729 int ret;
730
731 if (will_write_block(port)) {
732 if (nonblock)
733 return -EAGAIN;
734
735 ret = wait_event_freezable(port->waitqueue,
736 !will_write_block(port));
737 if (ret < 0)
738 return ret;
739 }
740 /* Port got hot-unplugged. */
741 if (!port->guest_connected)
742 return -ENODEV;
743
744 return 0;
745}
746
668static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, 747static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
669 size_t count, loff_t *offp) 748 size_t count, loff_t *offp)
670{ 749{
@@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
681 760
682 nonblock = filp->f_flags & O_NONBLOCK; 761 nonblock = filp->f_flags & O_NONBLOCK;
683 762
684 if (will_write_block(port)) { 763 ret = wait_port_writable(port, nonblock);
685 if (nonblock) 764 if (ret < 0)
686 return -EAGAIN; 765 return ret;
687
688 ret = wait_event_freezable(port->waitqueue,
689 !will_write_block(port));
690 if (ret < 0)
691 return ret;
692 }
693 /* Port got hot-unplugged. */
694 if (!port->guest_connected)
695 return -ENODEV;
696 766
697 count = min((size_t)(32 * 1024), count); 767 count = min((size_t)(32 * 1024), count);
698 768
@@ -725,6 +795,93 @@ out:
725 return ret; 795 return ret;
726} 796}
727 797
798struct sg_list {
799 unsigned int n;
800 unsigned int size;
801 size_t len;
802 struct scatterlist *sg;
803};
804
805static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
806 struct splice_desc *sd)
807{
808 struct sg_list *sgl = sd->u.data;
809 unsigned int offset, len;
810
811 if (sgl->n == sgl->size)
812 return 0;
813
814 /* Try lock this page */
815 if (buf->ops->steal(pipe, buf) == 0) {
816 /* Get reference and unlock page for moving */
817 get_page(buf->page);
818 unlock_page(buf->page);
819
820 len = min(buf->len, sd->len);
821 sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset);
822 } else {
823 /* Failback to copying a page */
824 struct page *page = alloc_page(GFP_KERNEL);
825 char *src = buf->ops->map(pipe, buf, 1);
826 char *dst;
827
828 if (!page)
829 return -ENOMEM;
830 dst = kmap(page);
831
832 offset = sd->pos & ~PAGE_MASK;
833
834 len = sd->len;
835 if (len + offset > PAGE_SIZE)
836 len = PAGE_SIZE - offset;
837
838 memcpy(dst + offset, src + buf->offset, len);
839
840 kunmap(page);
841 buf->ops->unmap(pipe, buf, src);
842
843 sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);
844 }
845 sgl->n++;
846 sgl->len += len;
847
848 return len;
849}
850
851/* Faster zero-copy write by splicing */
852static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
853 struct file *filp, loff_t *ppos,
854 size_t len, unsigned int flags)
855{
856 struct port *port = filp->private_data;
857 struct sg_list sgl;
858 ssize_t ret;
859 struct splice_desc sd = {
860 .total_len = len,
861 .flags = flags,
862 .pos = *ppos,
863 .u.data = &sgl,
864 };
865
866 ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
867 if (ret < 0)
868 return ret;
869
870 sgl.n = 0;
871 sgl.len = 0;
872 sgl.size = pipe->nrbufs;
873 sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL);
874 if (unlikely(!sgl.sg))
875 return -ENOMEM;
876
877 sg_init_table(sgl.sg, sgl.size);
878 ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
879 if (likely(ret > 0))
880 ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true);
881
882 return ret;
883}
884
728static unsigned int port_fops_poll(struct file *filp, poll_table *wait) 885static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
729{ 886{
730 struct port *port; 887 struct port *port;
@@ -856,6 +1013,7 @@ static const struct file_operations port_fops = {
856 .open = port_fops_open, 1013 .open = port_fops_open,
857 .read = port_fops_read, 1014 .read = port_fops_read,
858 .write = port_fops_write, 1015 .write = port_fops_write,
1016 .splice_write = port_fops_splice_write,
859 .poll = port_fops_poll, 1017 .poll = port_fops_poll,
860 .release = port_fops_release, 1018 .release = port_fops_release,
861 .fasync = port_fops_fasync, 1019 .fasync = port_fops_fasync,
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 9e8388efd88e..fc92ccbd71dc 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -263,6 +263,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
263 struct virtqueue *vq; 263 struct virtqueue *vq;
264 int err; 264 int err;
265 265
266 if (!name)
267 return NULL;
268
266 /* We must have this many virtqueues. */ 269 /* We must have this many virtqueues. */
267 if (index >= ldev->desc->num_vq) 270 if (index >= ldev->desc->num_vq)
268 return ERR_PTR(-ENOENT); 271 return ERR_PTR(-ENOENT);
@@ -296,7 +299,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
296 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu 299 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
297 * barriers. 300 * barriers.
298 */ 301 */
299 vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev, 302 vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev,
300 true, lvq->pages, lg_notify, callback, name); 303 true, lvq->pages, lg_notify, callback, name);
301 if (!vq) { 304 if (!vq) {
302 err = -ENOMEM; 305 err = -ENOMEM;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 3541b4492f64..e7a4780e93db 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -84,6 +84,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
84 if (id >= ARRAY_SIZE(rvdev->vring)) 84 if (id >= ARRAY_SIZE(rvdev->vring))
85 return ERR_PTR(-EINVAL); 85 return ERR_PTR(-EINVAL);
86 86
87 if (!name)
88 return NULL;
89
87 ret = rproc_alloc_vring(rvdev, id); 90 ret = rproc_alloc_vring(rvdev, id);
88 if (ret) 91 if (ret)
89 return ERR_PTR(ret); 92 return ERR_PTR(ret);
@@ -103,7 +106,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
103 * Create the new vq, and tell virtio we're not interested in 106 * Create the new vq, and tell virtio we're not interested in
104 * the 'weak' smp barriers, since we're talking with a real device. 107 * the 'weak' smp barriers, since we're talking with a real device.
105 */ 108 */
106 vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr, 109 vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
107 rproc_virtio_notify, callback, name); 110 rproc_virtio_notify, callback, name);
108 if (!vq) { 111 if (!vq) {
109 dev_err(dev, "vring_new_virtqueue %s failed\n", name); 112 dev_err(dev, "vring_new_virtqueue %s failed\n", name);
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
index 32aead65735a..2bd911f12571 100644
--- a/drivers/rpmsg/Kconfig
+++ b/drivers/rpmsg/Kconfig
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)"
4config RPMSG 4config RPMSG
5 tristate 5 tristate
6 select VIRTIO 6 select VIRTIO
7 select VIRTIO_RING
8 depends on EXPERIMENTAL 7 depends on EXPERIMENTAL
9 8
10endmenu 9endmenu
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 47cccd52aae8..7dabef624da3 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -190,6 +190,9 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
190 if (index >= kdev->desc->num_vq) 190 if (index >= kdev->desc->num_vq)
191 return ERR_PTR(-ENOENT); 191 return ERR_PTR(-ENOENT);
192 192
193 if (!name)
194 return NULL;
195
193 config = kvm_vq_config(kdev->desc)+index; 196 config = kvm_vq_config(kdev->desc)+index;
194 197
195 err = vmem_add_mapping(config->address, 198 err = vmem_add_mapping(config->address,
@@ -198,7 +201,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
198 if (err) 201 if (err)
199 goto out; 202 goto out;
200 203
201 vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, 204 vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
202 vdev, true, (void *) config->address, 205 vdev, true, (void *) config->address,
203 kvm_notify, callback, name); 206 kvm_notify, callback, name);
204 if (!vq) { 207 if (!vq) {
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index f38b17a86c35..8d5bddb56cb1 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -1,11 +1,9 @@
1# Virtio always gets selected by whoever wants it.
2config VIRTIO 1config VIRTIO
3 tristate 2 tristate
4 3 ---help---
5# Similarly the virtio ring implementation. 4 This option is selected by any driver which implements the virtio
6config VIRTIO_RING 5 bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
7 tristate 6 CONFIG_RPMSG or CONFIG_S390_GUEST.
8 depends on VIRTIO
9 7
10menu "Virtio drivers" 8menu "Virtio drivers"
11 9
@@ -13,7 +11,6 @@ config VIRTIO_PCI
13 tristate "PCI driver for virtio devices (EXPERIMENTAL)" 11 tristate "PCI driver for virtio devices (EXPERIMENTAL)"
14 depends on PCI && EXPERIMENTAL 12 depends on PCI && EXPERIMENTAL
15 select VIRTIO 13 select VIRTIO
16 select VIRTIO_RING
17 ---help--- 14 ---help---
18 This drivers provides support for virtio based paravirtual device 15 This drivers provides support for virtio based paravirtual device
19 drivers over PCI. This requires that your VMM has appropriate PCI 16 drivers over PCI. This requires that your VMM has appropriate PCI
@@ -26,9 +23,8 @@ config VIRTIO_PCI
26 If unsure, say M. 23 If unsure, say M.
27 24
28config VIRTIO_BALLOON 25config VIRTIO_BALLOON
29 tristate "Virtio balloon driver (EXPERIMENTAL)" 26 tristate "Virtio balloon driver"
30 select VIRTIO 27 depends on VIRTIO
31 select VIRTIO_RING
32 ---help--- 28 ---help---
33 This driver supports increasing and decreasing the amount 29 This driver supports increasing and decreasing the amount
34 of memory within a KVM guest. 30 of memory within a KVM guest.
@@ -39,7 +35,6 @@ config VIRTIO_BALLOON
39 tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)" 35 tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
40 depends on HAS_IOMEM && EXPERIMENTAL 36 depends on HAS_IOMEM && EXPERIMENTAL
41 select VIRTIO 37 select VIRTIO
42 select VIRTIO_RING
43 ---help--- 38 ---help---
44 This drivers provides support for memory mapped virtio 39 This drivers provides support for memory mapped virtio
45 platform device driver. 40 platform device driver.
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 5a4c63cfd380..9076635697bb 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,4 @@
1obj-$(CONFIG_VIRTIO) += virtio.o 1obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
3obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o 2obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
4obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o 3obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
5obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o 4obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index c3b3f7f0d9d1..1e8659ca27ef 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -159,7 +159,7 @@ static int virtio_dev_remove(struct device *_d)
159 drv->remove(dev); 159 drv->remove(dev);
160 160
161 /* Driver should have reset device. */ 161 /* Driver should have reset device. */
162 BUG_ON(dev->config->get_status(dev)); 162 WARN_ON_ONCE(dev->config->get_status(dev));
163 163
164 /* Acknowledge the device's existence again. */ 164 /* Acknowledge the device's existence again. */
165 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 165 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 453db0c403d8..6b1b7e184939 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -131,9 +131,6 @@ struct virtio_mmio_vq_info {
131 /* the number of entries in the queue */ 131 /* the number of entries in the queue */
132 unsigned int num; 132 unsigned int num;
133 133
134 /* the index of the queue */
135 int queue_index;
136
137 /* the virtual address of the ring queue */ 134 /* the virtual address of the ring queue */
138 void *queue; 135 void *queue;
139 136
@@ -225,11 +222,10 @@ static void vm_reset(struct virtio_device *vdev)
225static void vm_notify(struct virtqueue *vq) 222static void vm_notify(struct virtqueue *vq)
226{ 223{
227 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); 224 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
228 struct virtio_mmio_vq_info *info = vq->priv;
229 225
230 /* We write the queue's selector into the notification register to 226 /* We write the queue's selector into the notification register to
231 * signal the other end */ 227 * signal the other end */
232 writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); 228 writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
233} 229}
234 230
235/* Notify all virtqueues on an interrupt. */ 231/* Notify all virtqueues on an interrupt. */
@@ -270,6 +266,7 @@ static void vm_del_vq(struct virtqueue *vq)
270 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); 266 struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
271 struct virtio_mmio_vq_info *info = vq->priv; 267 struct virtio_mmio_vq_info *info = vq->priv;
272 unsigned long flags, size; 268 unsigned long flags, size;
269 unsigned int index = virtqueue_get_queue_index(vq);
273 270
274 spin_lock_irqsave(&vm_dev->lock, flags); 271 spin_lock_irqsave(&vm_dev->lock, flags);
275 list_del(&info->node); 272 list_del(&info->node);
@@ -278,7 +275,7 @@ static void vm_del_vq(struct virtqueue *vq)
278 vring_del_virtqueue(vq); 275 vring_del_virtqueue(vq);
279 276
280 /* Select and deactivate the queue */ 277 /* Select and deactivate the queue */
281 writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); 278 writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
282 writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); 279 writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
283 280
284 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); 281 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
@@ -309,6 +306,9 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
309 unsigned long flags, size; 306 unsigned long flags, size;
310 int err; 307 int err;
311 308
309 if (!name)
310 return NULL;
311
312 /* Select the queue we're interested in */ 312 /* Select the queue we're interested in */
313 writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); 313 writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
314 314
@@ -324,7 +324,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
324 err = -ENOMEM; 324 err = -ENOMEM;
325 goto error_kmalloc; 325 goto error_kmalloc;
326 } 326 }
327 info->queue_index = index;
328 327
329 /* Allocate pages for the queue - start with a queue as big as 328 /* Allocate pages for the queue - start with a queue as big as
330 * possible (limited by maximum size allowed by device), drop down 329 * possible (limited by maximum size allowed by device), drop down
@@ -332,11 +331,21 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
332 * and two rings (which makes it "alignment_size * 2") 331 * and two rings (which makes it "alignment_size * 2")
333 */ 332 */
334 info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); 333 info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
334
335 /* If the device reports a 0 entry queue, we won't be able to
336 * use it to perform I/O, and vring_new_virtqueue() can't create
337 * empty queues anyway, so don't bother to set up the device.
338 */
339 if (info->num == 0) {
340 err = -ENOENT;
341 goto error_alloc_pages;
342 }
343
335 while (1) { 344 while (1) {
336 size = PAGE_ALIGN(vring_size(info->num, 345 size = PAGE_ALIGN(vring_size(info->num,
337 VIRTIO_MMIO_VRING_ALIGN)); 346 VIRTIO_MMIO_VRING_ALIGN));
338 /* Already smallest possible allocation? */ 347 /* Did the last iter shrink the queue below minimum size? */
339 if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) { 348 if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
340 err = -ENOMEM; 349 err = -ENOMEM;
341 goto error_alloc_pages; 350 goto error_alloc_pages;
342 } 351 }
@@ -356,7 +365,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
356 vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); 365 vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
357 366
358 /* Create the vring */ 367 /* Create the vring */
359 vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, 368 vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
360 true, info->queue, vm_notify, callback, name); 369 true, info->queue, vm_notify, callback, name);
361 if (!vq) { 370 if (!vq) {
362 err = -ENOMEM; 371 err = -ENOMEM;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d416b9af..c33aea36598a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -48,6 +48,7 @@ struct virtio_pci_device
48 int msix_enabled; 48 int msix_enabled;
49 int intx_enabled; 49 int intx_enabled;
50 struct msix_entry *msix_entries; 50 struct msix_entry *msix_entries;
51 cpumask_var_t *msix_affinity_masks;
51 /* Name strings for interrupts. This size should be enough, 52 /* Name strings for interrupts. This size should be enough,
52 * and I'm too lazy to allocate each name separately. */ 53 * and I'm too lazy to allocate each name separately. */
53 char (*msix_names)[256]; 54 char (*msix_names)[256];
@@ -79,9 +80,6 @@ struct virtio_pci_vq_info
79 /* the number of entries in the queue */ 80 /* the number of entries in the queue */
80 int num; 81 int num;
81 82
82 /* the index of the queue */
83 int queue_index;
84
85 /* the virtual address of the ring queue */ 83 /* the virtual address of the ring queue */
86 void *queue; 84 void *queue;
87 85
@@ -202,11 +200,11 @@ static void vp_reset(struct virtio_device *vdev)
202static void vp_notify(struct virtqueue *vq) 200static void vp_notify(struct virtqueue *vq)
203{ 201{
204 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 202 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
205 struct virtio_pci_vq_info *info = vq->priv;
206 203
207 /* we write the queue's selector into the notification register to 204 /* we write the queue's selector into the notification register to
208 * signal the other end */ 205 * signal the other end */
209 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); 206 iowrite16(virtqueue_get_queue_index(vq),
207 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
210} 208}
211 209
212/* Handle a configuration change: Tell driver if it wants to know. */ 210/* Handle a configuration change: Tell driver if it wants to know. */
@@ -279,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev)
279 for (i = 0; i < vp_dev->msix_used_vectors; ++i) 277 for (i = 0; i < vp_dev->msix_used_vectors; ++i)
280 free_irq(vp_dev->msix_entries[i].vector, vp_dev); 278 free_irq(vp_dev->msix_entries[i].vector, vp_dev);
281 279
280 for (i = 0; i < vp_dev->msix_vectors; i++)
281 if (vp_dev->msix_affinity_masks[i])
282 free_cpumask_var(vp_dev->msix_affinity_masks[i]);
283
282 if (vp_dev->msix_enabled) { 284 if (vp_dev->msix_enabled) {
283 /* Disable the vector used for configuration */ 285 /* Disable the vector used for configuration */
284 iowrite16(VIRTIO_MSI_NO_VECTOR, 286 iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -296,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev)
296 vp_dev->msix_names = NULL; 298 vp_dev->msix_names = NULL;
297 kfree(vp_dev->msix_entries); 299 kfree(vp_dev->msix_entries);
298 vp_dev->msix_entries = NULL; 300 vp_dev->msix_entries = NULL;
301 kfree(vp_dev->msix_affinity_masks);
302 vp_dev->msix_affinity_masks = NULL;
299} 303}
300 304
301static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, 305static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
@@ -314,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
314 GFP_KERNEL); 318 GFP_KERNEL);
315 if (!vp_dev->msix_names) 319 if (!vp_dev->msix_names)
316 goto error; 320 goto error;
321 vp_dev->msix_affinity_masks
322 = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
323 GFP_KERNEL);
324 if (!vp_dev->msix_affinity_masks)
325 goto error;
326 for (i = 0; i < nvectors; ++i)
327 if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
328 GFP_KERNEL))
329 goto error;
317 330
318 for (i = 0; i < nvectors; ++i) 331 for (i = 0; i < nvectors; ++i)
319 vp_dev->msix_entries[i].entry = i; 332 vp_dev->msix_entries[i].entry = i;
@@ -402,7 +415,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
402 if (!info) 415 if (!info)
403 return ERR_PTR(-ENOMEM); 416 return ERR_PTR(-ENOMEM);
404 417
405 info->queue_index = index;
406 info->num = num; 418 info->num = num;
407 info->msix_vector = msix_vec; 419 info->msix_vector = msix_vec;
408 420
@@ -418,7 +430,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
418 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 430 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
419 431
420 /* create the vring */ 432 /* create the vring */
421 vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev, 433 vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
422 true, info->queue, vp_notify, callback, name); 434 true, info->queue, vp_notify, callback, name);
423 if (!vq) { 435 if (!vq) {
424 err = -ENOMEM; 436 err = -ENOMEM;
@@ -467,7 +479,8 @@ static void vp_del_vq(struct virtqueue *vq)
467 list_del(&info->node); 479 list_del(&info->node);
468 spin_unlock_irqrestore(&vp_dev->lock, flags); 480 spin_unlock_irqrestore(&vp_dev->lock, flags);
469 481
470 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 482 iowrite16(virtqueue_get_queue_index(vq),
483 vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
471 484
472 if (vp_dev->msix_enabled) { 485 if (vp_dev->msix_enabled) {
473 iowrite16(VIRTIO_MSI_NO_VECTOR, 486 iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -542,7 +555,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
542 vp_dev->per_vq_vectors = per_vq_vectors; 555 vp_dev->per_vq_vectors = per_vq_vectors;
543 allocated_vectors = vp_dev->msix_used_vectors; 556 allocated_vectors = vp_dev->msix_used_vectors;
544 for (i = 0; i < nvqs; ++i) { 557 for (i = 0; i < nvqs; ++i) {
545 if (!callbacks[i] || !vp_dev->msix_enabled) 558 if (!names[i]) {
559 vqs[i] = NULL;
560 continue;
561 } else if (!callbacks[i] || !vp_dev->msix_enabled)
546 msix_vec = VIRTIO_MSI_NO_VECTOR; 562 msix_vec = VIRTIO_MSI_NO_VECTOR;
547 else if (vp_dev->per_vq_vectors) 563 else if (vp_dev->per_vq_vectors)
548 msix_vec = allocated_vectors++; 564 msix_vec = allocated_vectors++;
@@ -609,6 +625,35 @@ static const char *vp_bus_name(struct virtio_device *vdev)
609 return pci_name(vp_dev->pci_dev); 625 return pci_name(vp_dev->pci_dev);
610} 626}
611 627
628/* Setup the affinity for a virtqueue:
629 * - force the affinity for per vq vector
630 * - OR over all affinities for shared MSI
631 * - ignore the affinity request if we're using INTX
632 */
633static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
634{
635 struct virtio_device *vdev = vq->vdev;
636 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
637 struct virtio_pci_vq_info *info = vq->priv;
638 struct cpumask *mask;
639 unsigned int irq;
640
641 if (!vq->callback)
642 return -EINVAL;
643
644 if (vp_dev->msix_enabled) {
645 mask = vp_dev->msix_affinity_masks[info->msix_vector];
646 irq = vp_dev->msix_entries[info->msix_vector].vector;
647 if (cpu == -1)
648 irq_set_affinity_hint(irq, NULL);
649 else {
650 cpumask_set_cpu(cpu, mask);
651 irq_set_affinity_hint(irq, mask);
652 }
653 }
654 return 0;
655}
656
612static struct virtio_config_ops virtio_pci_config_ops = { 657static struct virtio_config_ops virtio_pci_config_ops = {
613 .get = vp_get, 658 .get = vp_get,
614 .set = vp_set, 659 .set = vp_set,
@@ -620,6 +665,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
620 .get_features = vp_get_features, 665 .get_features = vp_get_features,
621 .finalize_features = vp_finalize_features, 666 .finalize_features = vp_finalize_features,
622 .bus_name = vp_bus_name, 667 .bus_name = vp_bus_name,
668 .set_vq_affinity = vp_set_vq_affinity,
623}; 669};
624 670
625static void virtio_pci_release_dev(struct device *_d) 671static void virtio_pci_release_dev(struct device *_d)
@@ -673,8 +719,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
673 goto out_enable_device; 719 goto out_enable_device;
674 720
675 vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); 721 vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
676 if (vp_dev->ioaddr == NULL) 722 if (vp_dev->ioaddr == NULL) {
723 err = -ENOMEM;
677 goto out_req_regions; 724 goto out_req_regions;
725 }
678 726
679 pci_set_drvdata(pci_dev, vp_dev); 727 pci_set_drvdata(pci_dev, vp_dev);
680 pci_set_master(pci_dev); 728 pci_set_master(pci_dev);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3392a2..e639584b2dbd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -106,6 +106,9 @@ struct vring_virtqueue
106 /* How to notify other side. FIXME: commonalize hcalls! */ 106 /* How to notify other side. FIXME: commonalize hcalls! */
107 void (*notify)(struct virtqueue *vq); 107 void (*notify)(struct virtqueue *vq);
108 108
109 /* Index of the queue */
110 int queue_index;
111
109#ifdef DEBUG 112#ifdef DEBUG
110 /* They're supposed to lock for us. */ 113 /* They're supposed to lock for us. */
111 unsigned int in_use; 114 unsigned int in_use;
@@ -171,6 +174,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
171 return head; 174 return head;
172} 175}
173 176
177int virtqueue_get_queue_index(struct virtqueue *_vq)
178{
179 struct vring_virtqueue *vq = to_vvq(_vq);
180 return vq->queue_index;
181}
182EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
183
174/** 184/**
175 * virtqueue_add_buf - expose buffer to other end 185 * virtqueue_add_buf - expose buffer to other end
176 * @vq: the struct virtqueue we're talking about. 186 * @vq: the struct virtqueue we're talking about.
@@ -616,7 +626,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
616} 626}
617EXPORT_SYMBOL_GPL(vring_interrupt); 627EXPORT_SYMBOL_GPL(vring_interrupt);
618 628
619struct virtqueue *vring_new_virtqueue(unsigned int num, 629struct virtqueue *vring_new_virtqueue(unsigned int index,
630 unsigned int num,
620 unsigned int vring_align, 631 unsigned int vring_align,
621 struct virtio_device *vdev, 632 struct virtio_device *vdev,
622 bool weak_barriers, 633 bool weak_barriers,
@@ -647,6 +658,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
647 vq->broken = false; 658 vq->broken = false;
648 vq->last_used_idx = 0; 659 vq->last_used_idx = 0;
649 vq->num_added = 0; 660 vq->num_added = 0;
661 vq->queue_index = index;
650 list_add_tail(&vq->vq.list, &vdev->vqs); 662 list_add_tail(&vq->vq.list, &vdev->vqs);
651#ifdef DEBUG 663#ifdef DEBUG
652 vq->in_use = false; 664 vq->in_use = false;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index a1ba8bbd9fbe..533b1157f22e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -50,6 +50,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
50 50
51unsigned int virtqueue_get_vring_size(struct virtqueue *vq); 51unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
52 52
53int virtqueue_get_queue_index(struct virtqueue *vq);
54
53/** 55/**
54 * virtio_device - representation of a device using virtio 56 * virtio_device - representation of a device using virtio
55 * @index: unique position on the virtio bus 57 * @index: unique position on the virtio bus
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index fc457f452f64..e2850a7ea276 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -84,7 +84,9 @@
84 * nvqs: the number of virtqueues to find 84 * nvqs: the number of virtqueues to find
85 * vqs: on success, includes new virtqueues 85 * vqs: on success, includes new virtqueues
86 * callbacks: array of callbacks, for each virtqueue 86 * callbacks: array of callbacks, for each virtqueue
87 * include a NULL entry for vqs that do not need a callback
87 * names: array of virtqueue names (mainly for debugging) 88 * names: array of virtqueue names (mainly for debugging)
89 * include a NULL entry for vqs unused by driver
88 * Returns 0 on success or error status 90 * Returns 0 on success or error status
89 * @del_vqs: free virtqueues found by find_vqs(). 91 * @del_vqs: free virtqueues found by find_vqs().
90 * @get_features: get the array of feature bits for this device. 92 * @get_features: get the array of feature bits for this device.
@@ -98,6 +100,7 @@
98 * vdev: the virtio_device 100 * vdev: the virtio_device
99 * This returns a pointer to the bus name a la pci_name from which 101 * This returns a pointer to the bus name a la pci_name from which
100 * the caller can then copy. 102 * the caller can then copy.
103 * @set_vq_affinity: set the affinity for a virtqueue.
101 */ 104 */
102typedef void vq_callback_t(struct virtqueue *); 105typedef void vq_callback_t(struct virtqueue *);
103struct virtio_config_ops { 106struct virtio_config_ops {
@@ -116,6 +119,7 @@ struct virtio_config_ops {
116 u32 (*get_features)(struct virtio_device *vdev); 119 u32 (*get_features)(struct virtio_device *vdev);
117 void (*finalize_features)(struct virtio_device *vdev); 120 void (*finalize_features)(struct virtio_device *vdev);
118 const char *(*bus_name)(struct virtio_device *vdev); 121 const char *(*bus_name)(struct virtio_device *vdev);
122 int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
119}; 123};
120 124
121/* If driver didn't advertise the feature, it will never appear. */ 125/* If driver didn't advertise the feature, it will never appear. */
@@ -190,5 +194,24 @@ const char *virtio_bus_name(struct virtio_device *vdev)
190 return vdev->config->bus_name(vdev); 194 return vdev->config->bus_name(vdev);
191} 195}
192 196
197/**
198 * virtqueue_set_affinity - setting affinity for a virtqueue
199 * @vq: the virtqueue
200 * @cpu: the cpu no.
201 *
202 * Pay attention the function are best-effort: the affinity hint may not be set
203 * due to config support, irq type and sharing.
204 *
205 */
206static inline
207int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
208{
209 struct virtio_device *vdev = vq->vdev;
210 if (vdev->config->set_vq_affinity)
211 return vdev->config->set_vq_affinity(vq, cpu);
212 return 0;
213}
214
215
193#endif /* __KERNEL__ */ 216#endif /* __KERNEL__ */
194#endif /* _LINUX_VIRTIO_CONFIG_H */ 217#endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e338730c2660..c2d793a06ad7 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -165,7 +165,8 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
165struct virtio_device; 165struct virtio_device;
166struct virtqueue; 166struct virtqueue;
167 167
168struct virtqueue *vring_new_virtqueue(unsigned int num, 168struct virtqueue *vring_new_virtqueue(unsigned int index,
169 unsigned int num,
169 unsigned int vring_align, 170 unsigned int vring_align,
170 struct virtio_device *vdev, 171 struct virtio_device *vdev,
171 bool weak_barriers, 172 bool weak_barriers,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cdcb59450b49..31e4f55773f1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4200,12 +4200,6 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
4200 buf->private = 0; 4200 buf->private = 0;
4201} 4201}
4202 4202
4203static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
4204 struct pipe_buffer *buf)
4205{
4206 return 1;
4207}
4208
4209static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, 4203static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
4210 struct pipe_buffer *buf) 4204 struct pipe_buffer *buf)
4211{ 4205{
@@ -4221,7 +4215,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = {
4221 .unmap = generic_pipe_buf_unmap, 4215 .unmap = generic_pipe_buf_unmap,
4222 .confirm = generic_pipe_buf_confirm, 4216 .confirm = generic_pipe_buf_confirm,
4223 .release = buffer_pipe_buf_release, 4217 .release = buffer_pipe_buf_release,
4224 .steal = buffer_pipe_buf_steal, 4218 .steal = generic_pipe_buf_steal,
4225 .get = buffer_pipe_buf_get, 4219 .get = buffer_pipe_buf_get,
4226}; 4220};
4227 4221
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index f759f4f097c7..fd2f9221b241 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type)
1299 dev->feature_len = 0; 1299 dev->feature_len = 0;
1300 dev->num_vq = 0; 1300 dev->num_vq = 0;
1301 dev->running = false; 1301 dev->running = false;
1302 dev->next = NULL;
1302 1303
1303 /* 1304 /*
1304 * Append to device list. Prepending to a single-linked list is 1305 * Append to device list. Prepending to a single-linked list is
diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile
new file mode 100644
index 000000000000..0d2381633475
--- /dev/null
+++ b/tools/virtio/virtio-trace/Makefile
@@ -0,0 +1,13 @@
1CC = gcc
2CFLAGS = -O2 -Wall -pthread
3
4all: trace-agent
5
6.c.o:
7 $(CC) $(CFLAGS) -c $^ -o $@
8
9trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
10 $(CC) $(CFLAGS) -o $@ $^
11
12clean:
13 rm -f *.o trace-agent
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
new file mode 100644
index 000000000000..b64845b823ab
--- /dev/null
+++ b/tools/virtio/virtio-trace/README
@@ -0,0 +1,118 @@
1Trace Agent for virtio-trace
2============================
3
4Trace agent is a user tool for sending trace data of a guest to a Host in low
5overhead. Trace agent has the following functions:
6 - splice a page of ring-buffer to read_pipe without memory copying
7 - splice the page from write_pipe to virtio-console without memory copying
8 - write trace data to stdout by using -o option
9 - controlled by start/stop orders from a Host
10
11The trace agent operates as follows:
12 1) Initialize all structures.
13 2) Create a read/write thread per CPU. Each thread is bound to a CPU.
14 The read/write threads hold it.
15 3) A controller thread does poll() for a start order of a host.
16 4) After the controller of the trace agent receives a start order from a host,
17 the controller wake read/write threads.
18 5) The read/write threads start to read trace data from ring-buffers and
19 write the data to virtio-serial.
20 6) If the controller receives a stop order from a host, the read/write threads
21 stop to read trace data.
22
23
24Files
25=====
26
27README: this file
28Makefile: Makefile of trace agent for virtio-trace
29trace-agent.c: includes main function, sets up for operating trace agent
30trace-agent.h: includes all structures and some macros
31trace-agent-ctl.c: includes controller function for read/write threads
32trace-agent-rw.c: includes read/write threads function
33
34
35Setup
36=====
37
38To use this trace agent for virtio-trace, we need to prepare some virtio-serial
39I/Fs.
40
411) Make FIFO in a host
42 virtio-trace uses virtio-serial pipe as trace data paths as to the number
43of CPUs and a control path, so FIFO (named pipe) should be created as follows:
44 # mkdir /tmp/virtio-trace/
45 # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
46 # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
47
48For example, if a guest use three CPUs, the names are
49 trace-path-cpu{0,1,2}.{in.out}
50and
51 agent-ctl-path.{in,out}.
52
532) Set up of virtio-serial pipe in a host
54 Add qemu option to use virtio-serial pipe.
55
56 ##virtio-serial device##
57 -device virtio-serial-pci,id=virtio-serial0\
58 ##control path##
59 -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
60 -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
61 id=channel0,name=agent-ctl-path\
62 ##data path##
63 -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
64 -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
65 id=channel1,name=trace-path-cpu0\
66 ...
67
68If you manage guests with libvirt, add the following tags to domain XML files.
69Then, libvirt passes the same command option to qemu.
70
71 <channel type='pipe'>
72 <source path='/tmp/virtio-trace/agent-ctl-path'/>
73 <target type='virtio' name='agent-ctl-path'/>
74 <address type='virtio-serial' controller='0' bus='0' port='0'/>
75 </channel>
76 <channel type='pipe'>
77 <source path='/tmp/virtio-trace/trace-path-cpu0'/>
78 <target type='virtio' name='trace-path-cpu0'/>
79 <address type='virtio-serial' controller='0' bus='0' port='1'/>
80 </channel>
81 ...
82Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
83example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
84trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
85
863) Boot the guest
87 You can find some chardev in /dev/virtio-ports/ in the guest.
88
89
90Run
91===
92
930) Build trace agent in a guest
94 $ make
95
961) Enable ftrace in the guest
97 <Example>
98 # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
99
1002) Run trace agent in the guest
101 This agent must be operated as root.
102 # ./trace-agent
103read/write threads in the agent wait for start order from host. If you add -o
104option, trace data are output via stdout in the guest.
105
1063) Open FIFO in a host
107 # cat /tmp/virtio-trace/trace-path-cpu0.out
108If a host does not open these, trace data get stuck in buffers of virtio. Then,
109the guest will stop by specification of chardev in QEMU. This blocking mode may
110be solved in the future.
111
1124) Start to read trace data by ordering from a host
113 A host injects read start order to the guest via virtio-serial.
114 # echo 1 > /tmp/virtio-trace/agent-ctl-path.in
115
1165) Stop to read trace data by ordering from a host
117 A host injects read stop order to the guest via virtio-serial.
118 # echo 0 > /tmp/virtio-trace/agent-ctl-path.in
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c
new file mode 100644
index 000000000000..a2d0403c4f94
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -0,0 +1,137 @@
1/*
2 * Controller of read/write threads for virtio-trace
3 *
4 * Copyright (C) 2012 Hitachi, Ltd.
5 * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
6 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
7 *
8 * Licensed under GPL version 2 only.
9 *
10 */
11
12#define _GNU_SOURCE
13#include <fcntl.h>
14#include <poll.h>
15#include <signal.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <unistd.h>
19#include "trace-agent.h"
20
21#define HOST_MSG_SIZE 256
22#define EVENT_WAIT_MSEC 100
23
24static volatile sig_atomic_t global_signal_val;
25bool global_sig_receive; /* default false */
26bool global_run_operation; /* default false*/
27
28/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
29static void signal_handler(int sig)
30{
31 global_signal_val = sig;
32}
33
34int rw_ctl_init(const char *ctl_path)
35{
36 int ctl_fd;
37
38 ctl_fd = open(ctl_path, O_RDONLY);
39 if (ctl_fd == -1) {
40 pr_err("Cannot open ctl_fd\n");
41 goto error;
42 }
43
44 return ctl_fd;
45
46error:
47 exit(EXIT_FAILURE);
48}
49
50static int wait_order(int ctl_fd)
51{
52 struct pollfd poll_fd;
53 int ret = 0;
54
55 while (!global_sig_receive) {
56 poll_fd.fd = ctl_fd;
57 poll_fd.events = POLLIN;
58
59 ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
60
61 if (global_signal_val) {
62 global_sig_receive = true;
63 pr_info("Receive interrupt %d\n", global_signal_val);
64
65 /* Wakes rw-threads when they are sleeping */
66 if (!global_run_operation)
67 pthread_cond_broadcast(&cond_wakeup);
68
69 ret = -1;
70 break;
71 }
72
73 if (ret < 0) {
74 pr_err("Polling error\n");
75 goto error;
76 }
77
78 if (ret)
79 break;
80 };
81
82 return ret;
83
84error:
85 exit(EXIT_FAILURE);
86}
87
88/*
89 * contol read/write threads by handling global_run_operation
90 */
91void *rw_ctl_loop(int ctl_fd)
92{
93 ssize_t rlen;
94 char buf[HOST_MSG_SIZE];
95 int ret;
96
97 /* Setup signal handlers */
98 signal(SIGTERM, signal_handler);
99 signal(SIGINT, signal_handler);
100 signal(SIGQUIT, signal_handler);
101
102 while (!global_sig_receive) {
103
104 ret = wait_order(ctl_fd);
105 if (ret < 0)
106 break;
107
108 rlen = read(ctl_fd, buf, sizeof(buf));
109 if (rlen < 0) {
110 pr_err("read data error in ctl thread\n");
111 goto error;
112 }
113
114 if (rlen == 2 && buf[0] == '1') {
115 /*
116 * If host writes '1' to a control path,
117 * this controller wakes all read/write threads.
118 */
119 global_run_operation = true;
120 pthread_cond_broadcast(&cond_wakeup);
121 pr_debug("Wake up all read/write threads\n");
122 } else if (rlen == 2 && buf[0] == '0') {
123 /*
124 * If host writes '0' to a control path, read/write
125 * threads will wait for notification from Host.
126 */
127 global_run_operation = false;
128 pr_debug("Stop all read/write threads\n");
129 } else
130 pr_info("Invalid host notification: %s\n", buf);
131 }
132
133 return NULL;
134
135error:
136 exit(EXIT_FAILURE);
137}
diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c
new file mode 100644
index 000000000000..3aace5ea4842
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-rw.c
@@ -0,0 +1,192 @@
1/*
2 * Read/write thread of a guest agent for virtio-trace
3 *
4 * Copyright (C) 2012 Hitachi, Ltd.
5 * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
6 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
7 *
8 * Licensed under GPL version 2 only.
9 *
10 */
11
12#define _GNU_SOURCE
13#include <fcntl.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <sys/syscall.h>
18#include "trace-agent.h"
19
20#define READ_WAIT_USEC 100000
21
22void *rw_thread_info_new(void)
23{
24 struct rw_thread_info *rw_ti;
25
26 rw_ti = zalloc(sizeof(struct rw_thread_info));
27 if (rw_ti == NULL) {
28 pr_err("rw_thread_info zalloc error\n");
29 exit(EXIT_FAILURE);
30 }
31
32 rw_ti->cpu_num = -1;
33 rw_ti->in_fd = -1;
34 rw_ti->out_fd = -1;
35 rw_ti->read_pipe = -1;
36 rw_ti->write_pipe = -1;
37 rw_ti->pipe_size = PIPE_INIT;
38
39 return rw_ti;
40}
41
42void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
43 bool stdout_flag, unsigned long pipe_size,
44 struct rw_thread_info *rw_ti)
45{
46 int data_pipe[2];
47
48 rw_ti->cpu_num = cpu;
49
50 /* set read(input) fd */
51 rw_ti->in_fd = open(in_path, O_RDONLY);
52 if (rw_ti->in_fd == -1) {
53 pr_err("Could not open in_fd (CPU:%d)\n", cpu);
54 goto error;
55 }
56
57 /* set write(output) fd */
58 if (!stdout_flag) {
59 /* virtio-serial output mode */
60 rw_ti->out_fd = open(out_path, O_WRONLY);
61 if (rw_ti->out_fd == -1) {
62 pr_err("Could not open out_fd (CPU:%d)\n", cpu);
63 goto error;
64 }
65 } else
66 /* stdout mode */
67 rw_ti->out_fd = STDOUT_FILENO;
68
69 if (pipe2(data_pipe, O_NONBLOCK) < 0) {
70 pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
71 goto error;
72 }
73
74 /*
75 * Size of pipe is 64kB in default based on fs/pipe.c.
76 * To read/write trace data speedy, pipe size is changed.
77 */
78 if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
79 pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
80 goto error;
81 }
82
83 rw_ti->read_pipe = data_pipe[1];
84 rw_ti->write_pipe = data_pipe[0];
85 rw_ti->pipe_size = pipe_size;
86
87 return NULL;
88
89error:
90 exit(EXIT_FAILURE);
91}
92
93/* Bind a thread to a cpu */
94static void bind_cpu(int cpu_num)
95{
96 cpu_set_t mask;
97
98 CPU_ZERO(&mask);
99 CPU_SET(cpu_num, &mask);
100
101 /* bind my thread to cpu_num by assigning zero to the first argument */
102 if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
103 pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
104}
105
106static void *rw_thread_main(void *thread_info)
107{
108 ssize_t rlen, wlen;
109 ssize_t ret;
110 struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
111
112 bind_cpu(ts->cpu_num);
113
114 while (1) {
115 /* Wait for a read order of trace data by Host OS */
116 if (!global_run_operation) {
117 pthread_mutex_lock(&mutex_notify);
118 pthread_cond_wait(&cond_wakeup, &mutex_notify);
119 pthread_mutex_unlock(&mutex_notify);
120 }
121
122 if (global_sig_receive)
123 break;
124
125 /*
126 * Each thread read trace_pipe_raw of each cpu bounding the
127 * thread, so contention of multi-threads does not occur.
128 */
129 rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
130 ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
131
132 if (rlen < 0) {
133 pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
134 goto error;
135 } else if (rlen == 0) {
136 /*
137 * If trace data do not exist or are unreadable not
138 * for exceeding the page size, splice_read returns
139 * NULL. Then, this waits for being filled the data in a
140 * ring-buffer.
141 */
142 usleep(READ_WAIT_USEC);
143 pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
144 continue;
145 }
146
147 wlen = 0;
148
149 do {
150 ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
151 rlen - wlen,
152 SPLICE_F_MOVE | SPLICE_F_MORE);
153
154 if (ret < 0) {
155 pr_err("Splice_write in rw-thread(%d)\n",
156 ts->cpu_num);
157 goto error;
158 } else if (ret == 0)
159 /*
160 * When host reader is not in time for reading
161 * trace data, guest will be stopped. This is
162 * because char dev in QEMU is not supported
163 * non-blocking mode. Then, writer might be
164 * sleep in that case.
165 * This sleep will be removed by supporting
166 * non-blocking mode.
167 */
168 sleep(1);
169 wlen += ret;
170 } while (wlen < rlen);
171 }
172
173 return NULL;
174
175error:
176 exit(EXIT_FAILURE);
177}
178
179
180pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
181{
182 int ret;
183 pthread_t rw_thread_per_cpu;
184
185 ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
186 if (ret != 0) {
187 pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
188 exit(EXIT_FAILURE);
189 }
190
191 return rw_thread_per_cpu;
192}
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c
new file mode 100644
index 000000000000..0a0a7dd4eff7
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.c
@@ -0,0 +1,270 @@
1/*
2 * Guest agent for virtio-trace
3 *
4 * Copyright (C) 2012 Hitachi, Ltd.
5 * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
6 * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
7 *
8 * Licensed under GPL version 2 only.
9 *
10 */
11
12#define _GNU_SOURCE
13#include <limits.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include "trace-agent.h"
18
19#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
20#define PIPE_DEF_BUFS 16
21#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
22#define PIPE_MAX_SIZE (1024*1024)
23#define READ_PATH_FMT \
24 "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
25#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
26#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
27
28pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
29pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
30
31static int get_total_cpus(void)
32{
33 int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
34
35 if (nr_cpus <= 0) {
36 pr_err("Could not read cpus\n");
37 goto error;
38 } else if (nr_cpus > MAX_CPUS) {
39 pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
40 goto error;
41 }
42
43 return nr_cpus;
44
45error:
46 exit(EXIT_FAILURE);
47}
48
49static void *agent_info_new(void)
50{
51 struct agent_info *s;
52 int i;
53
54 s = zalloc(sizeof(struct agent_info));
55 if (s == NULL) {
56 pr_err("agent_info zalloc error\n");
57 exit(EXIT_FAILURE);
58 }
59
60 s->pipe_size = PIPE_INIT;
61 s->use_stdout = false;
62 s->cpus = get_total_cpus();
63 s->ctl_fd = -1;
64
65 /* read/write threads init */
66 for (i = 0; i < s->cpus; i++)
67 s->rw_ti[i] = rw_thread_info_new();
68
69 return s;
70}
71
72static unsigned long parse_size(const char *arg)
73{
74 unsigned long value, round;
75 char *ptr;
76
77 value = strtoul(arg, &ptr, 10);
78 switch (*ptr) {
79 case 'K': case 'k':
80 value <<= 10;
81 break;
82 case 'M': case 'm':
83 value <<= 20;
84 break;
85 default:
86 break;
87 }
88
89 if (value > PIPE_MAX_SIZE) {
90 pr_err("Pipe size must be less than 1MB\n");
91 goto error;
92 } else if (value < PIPE_MIN_SIZE) {
93 pr_err("Pipe size must be over 64KB\n");
94 goto error;
95 }
96
97 /* Align buffer size with page unit */
98 round = value & (PAGE_SIZE - 1);
99 value = value - round;
100
101 return value;
102error:
103 return 0;
104}
105
106static void usage(char const *prg)
107{
108 pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
109}
110
111static const char *make_path(int cpu_num, bool this_is_write_path)
112{
113 int ret;
114 char *buf;
115
116 buf = zalloc(PATH_MAX);
117 if (buf == NULL) {
118 pr_err("Could not allocate buffer\n");
119 goto error;
120 }
121
122 if (this_is_write_path)
123 /* write(output) path */
124 ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
125 else
126 /* read(input) path */
127 ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
128
129 if (ret <= 0) {
130 pr_err("Failed to generate %s path(CPU#%d):%d\n",
131 this_is_write_path ? "read" : "write", cpu_num, ret);
132 goto error;
133 }
134
135 return buf;
136
137error:
138 free(buf);
139 return NULL;
140}
141
142static const char *make_input_path(int cpu_num)
143{
144 return make_path(cpu_num, false);
145}
146
147static const char *make_output_path(int cpu_num)
148{
149 return make_path(cpu_num, true);
150}
151
152static void *agent_info_init(struct agent_info *s)
153{
154 int cpu;
155 const char *in_path = NULL;
156 const char *out_path = NULL;
157
158 /* init read/write threads */
159 for (cpu = 0; cpu < s->cpus; cpu++) {
160 /* set read(input) path per read/write thread */
161 in_path = make_input_path(cpu);
162 if (in_path == NULL)
163 goto error;
164
165 /* set write(output) path per read/write thread*/
166 if (!s->use_stdout) {
167 out_path = make_output_path(cpu);
168 if (out_path == NULL)
169 goto error;
170 } else
171 /* stdout mode */
172 pr_debug("stdout mode\n");
173
174 rw_thread_init(cpu, in_path, out_path, s->use_stdout,
175 s->pipe_size, s->rw_ti[cpu]);
176 }
177
178 /* init controller of read/write threads */
179 s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
180
181 return NULL;
182
183error:
184 exit(EXIT_FAILURE);
185}
186
187static void *parse_args(int argc, char *argv[], struct agent_info *s)
188{
189 int cmd;
190 unsigned long size;
191
192 while ((cmd = getopt(argc, argv, "hos:")) != -1) {
193 switch (cmd) {
194 /* stdout mode */
195 case 'o':
196 s->use_stdout = true;
197 break;
198 /* size of pipe */
199 case 's':
200 size = parse_size(optarg);
201 if (size == 0)
202 goto error;
203 s->pipe_size = size;
204 break;
205 case 'h':
206 default:
207 usage(argv[0]);
208 goto error;
209 }
210 }
211
212 agent_info_init(s);
213
214 return NULL;
215
216error:
217 exit(EXIT_FAILURE);
218}
219
220static void agent_main_loop(struct agent_info *s)
221{
222 int cpu;
223 pthread_t rw_thread_per_cpu[MAX_CPUS];
224
225 /* Start all read/write threads */
226 for (cpu = 0; cpu < s->cpus; cpu++)
227 rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
228
229 rw_ctl_loop(s->ctl_fd);
230
231 /* Finish all read/write threads */
232 for (cpu = 0; cpu < s->cpus; cpu++) {
233 int ret;
234
235 ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
236 if (ret != 0) {
237 pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
238 exit(EXIT_FAILURE);
239 }
240 }
241}
242
243static void agent_info_free(struct agent_info *s)
244{
245 int i;
246
247 close(s->ctl_fd);
248 for (i = 0; i < s->cpus; i++) {
249 close(s->rw_ti[i]->in_fd);
250 close(s->rw_ti[i]->out_fd);
251 close(s->rw_ti[i]->read_pipe);
252 close(s->rw_ti[i]->write_pipe);
253 free(s->rw_ti[i]);
254 }
255 free(s);
256}
257
258int main(int argc, char *argv[])
259{
260 struct agent_info *s = NULL;
261
262 s = agent_info_new();
263 parse_args(argc, argv, s);
264
265 agent_main_loop(s);
266
267 agent_info_free(s);
268
269 return 0;
270}
diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h
new file mode 100644
index 000000000000..8de79bfeaa73
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.h
@@ -0,0 +1,75 @@
1#ifndef __TRACE_AGENT_H__
2#define __TRACE_AGENT_H__
3#include <pthread.h>
4#include <stdbool.h>
5
6#define MAX_CPUS 256
7#define PIPE_INIT (1024*1024)
8
9/*
10 * agent_info - structure managing total information of guest agent
11 * @pipe_size: size of pipe (default 1MB)
12 * @use_stdout: set to true when o option is added (default false)
13 * @cpus: total number of CPUs
14 * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
15 * @rw_ti: structure managing information of read/write threads
16 */
17struct agent_info {
18 unsigned long pipe_size;
19 bool use_stdout;
20 int cpus;
21 int ctl_fd;
22 struct rw_thread_info *rw_ti[MAX_CPUS];
23};
24
25/*
26 * rw_thread_info - structure managing a read/write thread a cpu
27 * @cpu_num: cpu number operating this read/write thread
28 * @in_fd: fd of reading trace data path in cpu_num
29 * @out_fd: fd of writing trace data path in cpu_num
30 * @read_pipe: fd of read pipe
31 * @write_pipe: fd of write pipe
32 * @pipe_size: size of pipe (default 1MB)
33 */
34struct rw_thread_info {
35 int cpu_num;
36 int in_fd;
37 int out_fd;
38 int read_pipe;
39 int write_pipe;
40 unsigned long pipe_size;
41};
42
43/* use for stopping rw threads */
44extern bool global_sig_receive;
45
46/* use for notification */
47extern bool global_run_operation;
48extern pthread_mutex_t mutex_notify;
49extern pthread_cond_t cond_wakeup;
50
51/* for controller of read/write threads */
52extern int rw_ctl_init(const char *ctl_path);
53extern void *rw_ctl_loop(int ctl_fd);
54
55/* for trace read/write thread */
56extern void *rw_thread_info_new(void);
57extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
58 bool stdout_flag, unsigned long pipe_size,
59 struct rw_thread_info *rw_ti);
60extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
61
62static inline void *zalloc(size_t size)
63{
64 return calloc(1, size);
65}
66
67#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
68#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
69#ifdef DEBUG
70#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
71#else
72#define pr_debug(format, ...) do {} while (0)
73#endif
74
75#endif /*__TRACE_AGENT_H__*/