aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/vhost.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r--drivers/vhost/vhost.c232
1 files changed, 198 insertions, 34 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0b99783083f6..e05557d52999 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -17,12 +17,13 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/miscdevice.h> 18#include <linux/miscdevice.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/workqueue.h>
21#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
22#include <linux/poll.h> 21#include <linux/poll.h>
23#include <linux/file.h> 22#include <linux/file.h>
24#include <linux/highmem.h> 23#include <linux/highmem.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/kthread.h>
26#include <linux/cgroup.h>
26 27
27#include <linux/net.h> 28#include <linux/net.h>
28#include <linux/if_packet.h> 29#include <linux/if_packet.h>
@@ -37,8 +38,6 @@ enum {
37 VHOST_MEMORY_F_LOG = 0x1, 38 VHOST_MEMORY_F_LOG = 0x1,
38}; 39};
39 40
40static struct workqueue_struct *vhost_workqueue;
41
42static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, 41static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
43 poll_table *pt) 42 poll_table *pt)
44{ 43{
@@ -52,23 +51,31 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
52static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, 51static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
53 void *key) 52 void *key)
54{ 53{
55 struct vhost_poll *poll; 54 struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
56 poll = container_of(wait, struct vhost_poll, wait); 55
57 if (!((unsigned long)key & poll->mask)) 56 if (!((unsigned long)key & poll->mask))
58 return 0; 57 return 0;
59 58
60 queue_work(vhost_workqueue, &poll->work); 59 vhost_poll_queue(poll);
61 return 0; 60 return 0;
62} 61}
63 62
64/* Init poll structure */ 63/* Init poll structure */
65void vhost_poll_init(struct vhost_poll *poll, work_func_t func, 64void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
66 unsigned long mask) 65 unsigned long mask, struct vhost_dev *dev)
67{ 66{
68 INIT_WORK(&poll->work, func); 67 struct vhost_work *work = &poll->work;
68
69 init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 69 init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
70 init_poll_funcptr(&poll->table, vhost_poll_func); 70 init_poll_funcptr(&poll->table, vhost_poll_func);
71 poll->mask = mask; 71 poll->mask = mask;
72 poll->dev = dev;
73
74 INIT_LIST_HEAD(&work->node);
75 work->fn = fn;
76 init_waitqueue_head(&work->done);
77 work->flushing = 0;
78 work->queue_seq = work->done_seq = 0;
72} 79}
73 80
74/* Start polling a file. We add ourselves to file's wait queue. The caller must 81/* Start polling a file. We add ourselves to file's wait queue. The caller must
@@ -92,12 +99,40 @@ void vhost_poll_stop(struct vhost_poll *poll)
92 * locks that are also used by the callback. */ 99 * locks that are also used by the callback. */
93void vhost_poll_flush(struct vhost_poll *poll) 100void vhost_poll_flush(struct vhost_poll *poll)
94{ 101{
95 flush_work(&poll->work); 102 struct vhost_work *work = &poll->work;
103 unsigned seq;
104 int left;
105 int flushing;
106
107 spin_lock_irq(&poll->dev->work_lock);
108 seq = work->queue_seq;
109 work->flushing++;
110 spin_unlock_irq(&poll->dev->work_lock);
111 wait_event(work->done, ({
112 spin_lock_irq(&poll->dev->work_lock);
113 left = seq - work->done_seq <= 0;
114 spin_unlock_irq(&poll->dev->work_lock);
115 left;
116 }));
117 spin_lock_irq(&poll->dev->work_lock);
118 flushing = --work->flushing;
119 spin_unlock_irq(&poll->dev->work_lock);
120 BUG_ON(flushing < 0);
96} 121}
97 122
98void vhost_poll_queue(struct vhost_poll *poll) 123void vhost_poll_queue(struct vhost_poll *poll)
99{ 124{
100 queue_work(vhost_workqueue, &poll->work); 125 struct vhost_dev *dev = poll->dev;
126 struct vhost_work *work = &poll->work;
127 unsigned long flags;
128
129 spin_lock_irqsave(&dev->work_lock, flags);
130 if (list_empty(&work->node)) {
131 list_add_tail(&work->node, &dev->work_list);
132 work->queue_seq++;
133 wake_up_process(dev->worker);
134 }
135 spin_unlock_irqrestore(&dev->work_lock, flags);
101} 136}
102 137
103static void vhost_vq_reset(struct vhost_dev *dev, 138static void vhost_vq_reset(struct vhost_dev *dev,
@@ -114,7 +149,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
114 vq->used_flags = 0; 149 vq->used_flags = 0;
115 vq->log_used = false; 150 vq->log_used = false;
116 vq->log_addr = -1ull; 151 vq->log_addr = -1ull;
117 vq->hdr_size = 0; 152 vq->vhost_hlen = 0;
153 vq->sock_hlen = 0;
118 vq->private_data = NULL; 154 vq->private_data = NULL;
119 vq->log_base = NULL; 155 vq->log_base = NULL;
120 vq->error_ctx = NULL; 156 vq->error_ctx = NULL;
@@ -125,10 +161,51 @@ static void vhost_vq_reset(struct vhost_dev *dev,
125 vq->log_ctx = NULL; 161 vq->log_ctx = NULL;
126} 162}
127 163
164static int vhost_worker(void *data)
165{
166 struct vhost_dev *dev = data;
167 struct vhost_work *work = NULL;
168 unsigned uninitialized_var(seq);
169
170 for (;;) {
171 /* mb paired w/ kthread_stop */
172 set_current_state(TASK_INTERRUPTIBLE);
173
174 spin_lock_irq(&dev->work_lock);
175 if (work) {
176 work->done_seq = seq;
177 if (work->flushing)
178 wake_up_all(&work->done);
179 }
180
181 if (kthread_should_stop()) {
182 spin_unlock_irq(&dev->work_lock);
183 __set_current_state(TASK_RUNNING);
184 return 0;
185 }
186 if (!list_empty(&dev->work_list)) {
187 work = list_first_entry(&dev->work_list,
188 struct vhost_work, node);
189 list_del_init(&work->node);
190 seq = work->queue_seq;
191 } else
192 work = NULL;
193 spin_unlock_irq(&dev->work_lock);
194
195 if (work) {
196 __set_current_state(TASK_RUNNING);
197 work->fn(work);
198 } else
199 schedule();
200
201 }
202}
203
128long vhost_dev_init(struct vhost_dev *dev, 204long vhost_dev_init(struct vhost_dev *dev,
129 struct vhost_virtqueue *vqs, int nvqs) 205 struct vhost_virtqueue *vqs, int nvqs)
130{ 206{
131 int i; 207 int i;
208
132 dev->vqs = vqs; 209 dev->vqs = vqs;
133 dev->nvqs = nvqs; 210 dev->nvqs = nvqs;
134 mutex_init(&dev->mutex); 211 mutex_init(&dev->mutex);
@@ -136,6 +213,9 @@ long vhost_dev_init(struct vhost_dev *dev,
136 dev->log_file = NULL; 213 dev->log_file = NULL;
137 dev->memory = NULL; 214 dev->memory = NULL;
138 dev->mm = NULL; 215 dev->mm = NULL;
216 spin_lock_init(&dev->work_lock);
217 INIT_LIST_HEAD(&dev->work_list);
218 dev->worker = NULL;
139 219
140 for (i = 0; i < dev->nvqs; ++i) { 220 for (i = 0; i < dev->nvqs; ++i) {
141 dev->vqs[i].dev = dev; 221 dev->vqs[i].dev = dev;
@@ -143,9 +223,9 @@ long vhost_dev_init(struct vhost_dev *dev,
143 vhost_vq_reset(dev, dev->vqs + i); 223 vhost_vq_reset(dev, dev->vqs + i);
144 if (dev->vqs[i].handle_kick) 224 if (dev->vqs[i].handle_kick)
145 vhost_poll_init(&dev->vqs[i].poll, 225 vhost_poll_init(&dev->vqs[i].poll,
146 dev->vqs[i].handle_kick, 226 dev->vqs[i].handle_kick, POLLIN, dev);
147 POLLIN);
148 } 227 }
228
149 return 0; 229 return 0;
150} 230}
151 231
@@ -159,12 +239,36 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
159/* Caller should have device mutex */ 239/* Caller should have device mutex */
160static long vhost_dev_set_owner(struct vhost_dev *dev) 240static long vhost_dev_set_owner(struct vhost_dev *dev)
161{ 241{
242 struct task_struct *worker;
243 int err;
162 /* Is there an owner already? */ 244 /* Is there an owner already? */
163 if (dev->mm) 245 if (dev->mm) {
164 return -EBUSY; 246 err = -EBUSY;
247 goto err_mm;
248 }
165 /* No owner, become one */ 249 /* No owner, become one */
166 dev->mm = get_task_mm(current); 250 dev->mm = get_task_mm(current);
251 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
252 if (IS_ERR(worker)) {
253 err = PTR_ERR(worker);
254 goto err_worker;
255 }
256
257 dev->worker = worker;
258 err = cgroup_attach_task_current_cg(worker);
259 if (err)
260 goto err_cgroup;
261 wake_up_process(worker); /* avoid contributing to loadavg */
262
167 return 0; 263 return 0;
264err_cgroup:
265 kthread_stop(worker);
266err_worker:
267 if (dev->mm)
268 mmput(dev->mm);
269 dev->mm = NULL;
270err_mm:
271 return err;
168} 272}
169 273
170/* Caller should have device mutex */ 274/* Caller should have device mutex */
@@ -217,6 +321,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
217 if (dev->mm) 321 if (dev->mm)
218 mmput(dev->mm); 322 mmput(dev->mm);
219 dev->mm = NULL; 323 dev->mm = NULL;
324
325 WARN_ON(!list_empty(&dev->work_list));
326 kthread_stop(dev->worker);
220} 327}
221 328
222static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 329static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
@@ -237,8 +344,8 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
237{ 344{
238 int i; 345 int i;
239 346
240 if (!mem) 347 if (!mem)
241 return 0; 348 return 0;
242 349
243 for (i = 0; i < mem->nregions; ++i) { 350 for (i = 0; i < mem->nregions; ++i) {
244 struct vhost_memory_region *m = mem->regions + i; 351 struct vhost_memory_region *m = mem->regions + i;
@@ -995,9 +1102,9 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
995} 1102}
996 1103
997/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ 1104/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
998void vhost_discard_vq_desc(struct vhost_virtqueue *vq) 1105void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
999{ 1106{
1000 vq->last_avail_idx--; 1107 vq->last_avail_idx -= n;
1001} 1108}
1002 1109
1003/* After we've used one of their buffers, we tell them about it. We'll then 1110/* After we've used one of their buffers, we tell them about it. We'll then
@@ -1042,6 +1149,67 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
1042 return 0; 1149 return 0;
1043} 1150}
1044 1151
1152static int __vhost_add_used_n(struct vhost_virtqueue *vq,
1153 struct vring_used_elem *heads,
1154 unsigned count)
1155{
1156 struct vring_used_elem __user *used;
1157 int start;
1158
1159 start = vq->last_used_idx % vq->num;
1160 used = vq->used->ring + start;
1161 if (copy_to_user(used, heads, count * sizeof *used)) {
1162 vq_err(vq, "Failed to write used");
1163 return -EFAULT;
1164 }
1165 if (unlikely(vq->log_used)) {
1166 /* Make sure data is seen before log. */
1167 smp_wmb();
1168 /* Log used ring entry write. */
1169 log_write(vq->log_base,
1170 vq->log_addr +
1171 ((void __user *)used - (void __user *)vq->used),
1172 count * sizeof *used);
1173 }
1174 vq->last_used_idx += count;
1175 return 0;
1176}
1177
1178/* After we've used one of their buffers, we tell them about it. We'll then
1179 * want to notify the guest, using eventfd. */
1180int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
1181 unsigned count)
1182{
1183 int start, n, r;
1184
1185 start = vq->last_used_idx % vq->num;
1186 n = vq->num - start;
1187 if (n < count) {
1188 r = __vhost_add_used_n(vq, heads, n);
1189 if (r < 0)
1190 return r;
1191 heads += n;
1192 count -= n;
1193 }
1194 r = __vhost_add_used_n(vq, heads, count);
1195
1196 /* Make sure buffer is written before we update index. */
1197 smp_wmb();
1198 if (put_user(vq->last_used_idx, &vq->used->idx)) {
1199 vq_err(vq, "Failed to increment used idx");
1200 return -EFAULT;
1201 }
1202 if (unlikely(vq->log_used)) {
1203 /* Log used index update. */
1204 log_write(vq->log_base,
1205 vq->log_addr + offsetof(struct vring_used, idx),
1206 sizeof vq->used->idx);
1207 if (vq->log_ctx)
1208 eventfd_signal(vq->log_ctx, 1);
1209 }
1210 return r;
1211}
1212
1045/* This actually signals the guest, using eventfd. */ 1213/* This actually signals the guest, using eventfd. */
1046void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 1214void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1047{ 1215{
@@ -1076,6 +1244,15 @@ void vhost_add_used_and_signal(struct vhost_dev *dev,
1076 vhost_signal(dev, vq); 1244 vhost_signal(dev, vq);
1077} 1245}
1078 1246
1247/* multi-buffer version of vhost_add_used_and_signal */
1248void vhost_add_used_and_signal_n(struct vhost_dev *dev,
1249 struct vhost_virtqueue *vq,
1250 struct vring_used_elem *heads, unsigned count)
1251{
1252 vhost_add_used_n(vq, heads, count);
1253 vhost_signal(dev, vq);
1254}
1255
1079/* OK, now we need to know about added descriptors. */ 1256/* OK, now we need to know about added descriptors. */
1080bool vhost_enable_notify(struct vhost_virtqueue *vq) 1257bool vhost_enable_notify(struct vhost_virtqueue *vq)
1081{ 1258{
@@ -1100,7 +1277,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
1100 return false; 1277 return false;
1101 } 1278 }
1102 1279
1103 return avail_idx != vq->last_avail_idx; 1280 return avail_idx != vq->avail_idx;
1104} 1281}
1105 1282
1106/* We don't need to be notified again. */ 1283/* We don't need to be notified again. */
@@ -1115,16 +1292,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq)
1115 vq_err(vq, "Failed to enable notification at %p: %d\n", 1292 vq_err(vq, "Failed to enable notification at %p: %d\n",
1116 &vq->used->flags, r); 1293 &vq->used->flags, r);
1117} 1294}
1118
1119int vhost_init(void)
1120{
1121 vhost_workqueue = create_singlethread_workqueue("vhost");
1122 if (!vhost_workqueue)
1123 return -ENOMEM;
1124 return 0;
1125}
1126
1127void vhost_cleanup(void)
1128{
1129 destroy_workqueue(vhost_workqueue);
1130}