diff options
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r-- | drivers/vhost/vhost.c | 232 |
1 files changed, 198 insertions, 34 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0b99783083f6..e05557d52999 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -17,12 +17,13 @@ | |||
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/miscdevice.h> | 18 | #include <linux/miscdevice.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/workqueue.h> | ||
21 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
22 | #include <linux/poll.h> | 21 | #include <linux/poll.h> |
23 | #include <linux/file.h> | 22 | #include <linux/file.h> |
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/kthread.h> | ||
26 | #include <linux/cgroup.h> | ||
26 | 27 | ||
27 | #include <linux/net.h> | 28 | #include <linux/net.h> |
28 | #include <linux/if_packet.h> | 29 | #include <linux/if_packet.h> |
@@ -37,8 +38,6 @@ enum { | |||
37 | VHOST_MEMORY_F_LOG = 0x1, | 38 | VHOST_MEMORY_F_LOG = 0x1, |
38 | }; | 39 | }; |
39 | 40 | ||
40 | static struct workqueue_struct *vhost_workqueue; | ||
41 | |||
42 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | 41 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, |
43 | poll_table *pt) | 42 | poll_table *pt) |
44 | { | 43 | { |
@@ -52,23 +51,31 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | |||
52 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, | 51 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, |
53 | void *key) | 52 | void *key) |
54 | { | 53 | { |
55 | struct vhost_poll *poll; | 54 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); |
56 | poll = container_of(wait, struct vhost_poll, wait); | 55 | |
57 | if (!((unsigned long)key & poll->mask)) | 56 | if (!((unsigned long)key & poll->mask)) |
58 | return 0; | 57 | return 0; |
59 | 58 | ||
60 | queue_work(vhost_workqueue, &poll->work); | 59 | vhost_poll_queue(poll); |
61 | return 0; | 60 | return 0; |
62 | } | 61 | } |
63 | 62 | ||
64 | /* Init poll structure */ | 63 | /* Init poll structure */ |
65 | void vhost_poll_init(struct vhost_poll *poll, work_func_t func, | 64 | void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, |
66 | unsigned long mask) | 65 | unsigned long mask, struct vhost_dev *dev) |
67 | { | 66 | { |
68 | INIT_WORK(&poll->work, func); | 67 | struct vhost_work *work = &poll->work; |
68 | |||
69 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); | 69 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); |
70 | init_poll_funcptr(&poll->table, vhost_poll_func); | 70 | init_poll_funcptr(&poll->table, vhost_poll_func); |
71 | poll->mask = mask; | 71 | poll->mask = mask; |
72 | poll->dev = dev; | ||
73 | |||
74 | INIT_LIST_HEAD(&work->node); | ||
75 | work->fn = fn; | ||
76 | init_waitqueue_head(&work->done); | ||
77 | work->flushing = 0; | ||
78 | work->queue_seq = work->done_seq = 0; | ||
72 | } | 79 | } |
73 | 80 | ||
74 | /* Start polling a file. We add ourselves to file's wait queue. The caller must | 81 | /* Start polling a file. We add ourselves to file's wait queue. The caller must |
@@ -92,12 +99,40 @@ void vhost_poll_stop(struct vhost_poll *poll) | |||
92 | * locks that are also used by the callback. */ | 99 | * locks that are also used by the callback. */ |
93 | void vhost_poll_flush(struct vhost_poll *poll) | 100 | void vhost_poll_flush(struct vhost_poll *poll) |
94 | { | 101 | { |
95 | flush_work(&poll->work); | 102 | struct vhost_work *work = &poll->work; |
103 | unsigned seq; | ||
104 | int left; | ||
105 | int flushing; | ||
106 | |||
107 | spin_lock_irq(&poll->dev->work_lock); | ||
108 | seq = work->queue_seq; | ||
109 | work->flushing++; | ||
110 | spin_unlock_irq(&poll->dev->work_lock); | ||
111 | wait_event(work->done, ({ | ||
112 | spin_lock_irq(&poll->dev->work_lock); | ||
113 | left = seq - work->done_seq <= 0; | ||
114 | spin_unlock_irq(&poll->dev->work_lock); | ||
115 | left; | ||
116 | })); | ||
117 | spin_lock_irq(&poll->dev->work_lock); | ||
118 | flushing = --work->flushing; | ||
119 | spin_unlock_irq(&poll->dev->work_lock); | ||
120 | BUG_ON(flushing < 0); | ||
96 | } | 121 | } |
97 | 122 | ||
98 | void vhost_poll_queue(struct vhost_poll *poll) | 123 | void vhost_poll_queue(struct vhost_poll *poll) |
99 | { | 124 | { |
100 | queue_work(vhost_workqueue, &poll->work); | 125 | struct vhost_dev *dev = poll->dev; |
126 | struct vhost_work *work = &poll->work; | ||
127 | unsigned long flags; | ||
128 | |||
129 | spin_lock_irqsave(&dev->work_lock, flags); | ||
130 | if (list_empty(&work->node)) { | ||
131 | list_add_tail(&work->node, &dev->work_list); | ||
132 | work->queue_seq++; | ||
133 | wake_up_process(dev->worker); | ||
134 | } | ||
135 | spin_unlock_irqrestore(&dev->work_lock, flags); | ||
101 | } | 136 | } |
102 | 137 | ||
103 | static void vhost_vq_reset(struct vhost_dev *dev, | 138 | static void vhost_vq_reset(struct vhost_dev *dev, |
@@ -114,7 +149,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
114 | vq->used_flags = 0; | 149 | vq->used_flags = 0; |
115 | vq->log_used = false; | 150 | vq->log_used = false; |
116 | vq->log_addr = -1ull; | 151 | vq->log_addr = -1ull; |
117 | vq->hdr_size = 0; | 152 | vq->vhost_hlen = 0; |
153 | vq->sock_hlen = 0; | ||
118 | vq->private_data = NULL; | 154 | vq->private_data = NULL; |
119 | vq->log_base = NULL; | 155 | vq->log_base = NULL; |
120 | vq->error_ctx = NULL; | 156 | vq->error_ctx = NULL; |
@@ -125,10 +161,51 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
125 | vq->log_ctx = NULL; | 161 | vq->log_ctx = NULL; |
126 | } | 162 | } |
127 | 163 | ||
164 | static int vhost_worker(void *data) | ||
165 | { | ||
166 | struct vhost_dev *dev = data; | ||
167 | struct vhost_work *work = NULL; | ||
168 | unsigned uninitialized_var(seq); | ||
169 | |||
170 | for (;;) { | ||
171 | /* mb paired w/ kthread_stop */ | ||
172 | set_current_state(TASK_INTERRUPTIBLE); | ||
173 | |||
174 | spin_lock_irq(&dev->work_lock); | ||
175 | if (work) { | ||
176 | work->done_seq = seq; | ||
177 | if (work->flushing) | ||
178 | wake_up_all(&work->done); | ||
179 | } | ||
180 | |||
181 | if (kthread_should_stop()) { | ||
182 | spin_unlock_irq(&dev->work_lock); | ||
183 | __set_current_state(TASK_RUNNING); | ||
184 | return 0; | ||
185 | } | ||
186 | if (!list_empty(&dev->work_list)) { | ||
187 | work = list_first_entry(&dev->work_list, | ||
188 | struct vhost_work, node); | ||
189 | list_del_init(&work->node); | ||
190 | seq = work->queue_seq; | ||
191 | } else | ||
192 | work = NULL; | ||
193 | spin_unlock_irq(&dev->work_lock); | ||
194 | |||
195 | if (work) { | ||
196 | __set_current_state(TASK_RUNNING); | ||
197 | work->fn(work); | ||
198 | } else | ||
199 | schedule(); | ||
200 | |||
201 | } | ||
202 | } | ||
203 | |||
128 | long vhost_dev_init(struct vhost_dev *dev, | 204 | long vhost_dev_init(struct vhost_dev *dev, |
129 | struct vhost_virtqueue *vqs, int nvqs) | 205 | struct vhost_virtqueue *vqs, int nvqs) |
130 | { | 206 | { |
131 | int i; | 207 | int i; |
208 | |||
132 | dev->vqs = vqs; | 209 | dev->vqs = vqs; |
133 | dev->nvqs = nvqs; | 210 | dev->nvqs = nvqs; |
134 | mutex_init(&dev->mutex); | 211 | mutex_init(&dev->mutex); |
@@ -136,6 +213,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
136 | dev->log_file = NULL; | 213 | dev->log_file = NULL; |
137 | dev->memory = NULL; | 214 | dev->memory = NULL; |
138 | dev->mm = NULL; | 215 | dev->mm = NULL; |
216 | spin_lock_init(&dev->work_lock); | ||
217 | INIT_LIST_HEAD(&dev->work_list); | ||
218 | dev->worker = NULL; | ||
139 | 219 | ||
140 | for (i = 0; i < dev->nvqs; ++i) { | 220 | for (i = 0; i < dev->nvqs; ++i) { |
141 | dev->vqs[i].dev = dev; | 221 | dev->vqs[i].dev = dev; |
@@ -143,9 +223,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
143 | vhost_vq_reset(dev, dev->vqs + i); | 223 | vhost_vq_reset(dev, dev->vqs + i); |
144 | if (dev->vqs[i].handle_kick) | 224 | if (dev->vqs[i].handle_kick) |
145 | vhost_poll_init(&dev->vqs[i].poll, | 225 | vhost_poll_init(&dev->vqs[i].poll, |
146 | dev->vqs[i].handle_kick, | 226 | dev->vqs[i].handle_kick, POLLIN, dev); |
147 | POLLIN); | ||
148 | } | 227 | } |
228 | |||
149 | return 0; | 229 | return 0; |
150 | } | 230 | } |
151 | 231 | ||
@@ -159,12 +239,36 @@ long vhost_dev_check_owner(struct vhost_dev *dev) | |||
159 | /* Caller should have device mutex */ | 239 | /* Caller should have device mutex */ |
160 | static long vhost_dev_set_owner(struct vhost_dev *dev) | 240 | static long vhost_dev_set_owner(struct vhost_dev *dev) |
161 | { | 241 | { |
242 | struct task_struct *worker; | ||
243 | int err; | ||
162 | /* Is there an owner already? */ | 244 | /* Is there an owner already? */ |
163 | if (dev->mm) | 245 | if (dev->mm) { |
164 | return -EBUSY; | 246 | err = -EBUSY; |
247 | goto err_mm; | ||
248 | } | ||
165 | /* No owner, become one */ | 249 | /* No owner, become one */ |
166 | dev->mm = get_task_mm(current); | 250 | dev->mm = get_task_mm(current); |
251 | worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); | ||
252 | if (IS_ERR(worker)) { | ||
253 | err = PTR_ERR(worker); | ||
254 | goto err_worker; | ||
255 | } | ||
256 | |||
257 | dev->worker = worker; | ||
258 | err = cgroup_attach_task_current_cg(worker); | ||
259 | if (err) | ||
260 | goto err_cgroup; | ||
261 | wake_up_process(worker); /* avoid contributing to loadavg */ | ||
262 | |||
167 | return 0; | 263 | return 0; |
264 | err_cgroup: | ||
265 | kthread_stop(worker); | ||
266 | err_worker: | ||
267 | if (dev->mm) | ||
268 | mmput(dev->mm); | ||
269 | dev->mm = NULL; | ||
270 | err_mm: | ||
271 | return err; | ||
168 | } | 272 | } |
169 | 273 | ||
170 | /* Caller should have device mutex */ | 274 | /* Caller should have device mutex */ |
@@ -217,6 +321,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
217 | if (dev->mm) | 321 | if (dev->mm) |
218 | mmput(dev->mm); | 322 | mmput(dev->mm); |
219 | dev->mm = NULL; | 323 | dev->mm = NULL; |
324 | |||
325 | WARN_ON(!list_empty(&dev->work_list)); | ||
326 | kthread_stop(dev->worker); | ||
220 | } | 327 | } |
221 | 328 | ||
222 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) | 329 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) |
@@ -237,8 +344,8 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, | |||
237 | { | 344 | { |
238 | int i; | 345 | int i; |
239 | 346 | ||
240 | if (!mem) | 347 | if (!mem) |
241 | return 0; | 348 | return 0; |
242 | 349 | ||
243 | for (i = 0; i < mem->nregions; ++i) { | 350 | for (i = 0; i < mem->nregions; ++i) { |
244 | struct vhost_memory_region *m = mem->regions + i; | 351 | struct vhost_memory_region *m = mem->regions + i; |
@@ -995,9 +1102,9 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
995 | } | 1102 | } |
996 | 1103 | ||
997 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ | 1104 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ |
998 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq) | 1105 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) |
999 | { | 1106 | { |
1000 | vq->last_avail_idx--; | 1107 | vq->last_avail_idx -= n; |
1001 | } | 1108 | } |
1002 | 1109 | ||
1003 | /* After we've used one of their buffers, we tell them about it. We'll then | 1110 | /* After we've used one of their buffers, we tell them about it. We'll then |
@@ -1042,6 +1149,67 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) | |||
1042 | return 0; | 1149 | return 0; |
1043 | } | 1150 | } |
1044 | 1151 | ||
1152 | static int __vhost_add_used_n(struct vhost_virtqueue *vq, | ||
1153 | struct vring_used_elem *heads, | ||
1154 | unsigned count) | ||
1155 | { | ||
1156 | struct vring_used_elem __user *used; | ||
1157 | int start; | ||
1158 | |||
1159 | start = vq->last_used_idx % vq->num; | ||
1160 | used = vq->used->ring + start; | ||
1161 | if (copy_to_user(used, heads, count * sizeof *used)) { | ||
1162 | vq_err(vq, "Failed to write used"); | ||
1163 | return -EFAULT; | ||
1164 | } | ||
1165 | if (unlikely(vq->log_used)) { | ||
1166 | /* Make sure data is seen before log. */ | ||
1167 | smp_wmb(); | ||
1168 | /* Log used ring entry write. */ | ||
1169 | log_write(vq->log_base, | ||
1170 | vq->log_addr + | ||
1171 | ((void __user *)used - (void __user *)vq->used), | ||
1172 | count * sizeof *used); | ||
1173 | } | ||
1174 | vq->last_used_idx += count; | ||
1175 | return 0; | ||
1176 | } | ||
1177 | |||
1178 | /* After we've used one of their buffers, we tell them about it. We'll then | ||
1179 | * want to notify the guest, using eventfd. */ | ||
1180 | int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | ||
1181 | unsigned count) | ||
1182 | { | ||
1183 | int start, n, r; | ||
1184 | |||
1185 | start = vq->last_used_idx % vq->num; | ||
1186 | n = vq->num - start; | ||
1187 | if (n < count) { | ||
1188 | r = __vhost_add_used_n(vq, heads, n); | ||
1189 | if (r < 0) | ||
1190 | return r; | ||
1191 | heads += n; | ||
1192 | count -= n; | ||
1193 | } | ||
1194 | r = __vhost_add_used_n(vq, heads, count); | ||
1195 | |||
1196 | /* Make sure buffer is written before we update index. */ | ||
1197 | smp_wmb(); | ||
1198 | if (put_user(vq->last_used_idx, &vq->used->idx)) { | ||
1199 | vq_err(vq, "Failed to increment used idx"); | ||
1200 | return -EFAULT; | ||
1201 | } | ||
1202 | if (unlikely(vq->log_used)) { | ||
1203 | /* Log used index update. */ | ||
1204 | log_write(vq->log_base, | ||
1205 | vq->log_addr + offsetof(struct vring_used, idx), | ||
1206 | sizeof vq->used->idx); | ||
1207 | if (vq->log_ctx) | ||
1208 | eventfd_signal(vq->log_ctx, 1); | ||
1209 | } | ||
1210 | return r; | ||
1211 | } | ||
1212 | |||
1045 | /* This actually signals the guest, using eventfd. */ | 1213 | /* This actually signals the guest, using eventfd. */ |
1046 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 1214 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
1047 | { | 1215 | { |
@@ -1076,6 +1244,15 @@ void vhost_add_used_and_signal(struct vhost_dev *dev, | |||
1076 | vhost_signal(dev, vq); | 1244 | vhost_signal(dev, vq); |
1077 | } | 1245 | } |
1078 | 1246 | ||
1247 | /* multi-buffer version of vhost_add_used_and_signal */ | ||
1248 | void vhost_add_used_and_signal_n(struct vhost_dev *dev, | ||
1249 | struct vhost_virtqueue *vq, | ||
1250 | struct vring_used_elem *heads, unsigned count) | ||
1251 | { | ||
1252 | vhost_add_used_n(vq, heads, count); | ||
1253 | vhost_signal(dev, vq); | ||
1254 | } | ||
1255 | |||
1079 | /* OK, now we need to know about added descriptors. */ | 1256 | /* OK, now we need to know about added descriptors. */ |
1080 | bool vhost_enable_notify(struct vhost_virtqueue *vq) | 1257 | bool vhost_enable_notify(struct vhost_virtqueue *vq) |
1081 | { | 1258 | { |
@@ -1100,7 +1277,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
1100 | return false; | 1277 | return false; |
1101 | } | 1278 | } |
1102 | 1279 | ||
1103 | return avail_idx != vq->last_avail_idx; | 1280 | return avail_idx != vq->avail_idx; |
1104 | } | 1281 | } |
1105 | 1282 | ||
1106 | /* We don't need to be notified again. */ | 1283 | /* We don't need to be notified again. */ |
@@ -1115,16 +1292,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq) | |||
1115 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1292 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
1116 | &vq->used->flags, r); | 1293 | &vq->used->flags, r); |
1117 | } | 1294 | } |
1118 | |||
1119 | int vhost_init(void) | ||
1120 | { | ||
1121 | vhost_workqueue = create_singlethread_workqueue("vhost"); | ||
1122 | if (!vhost_workqueue) | ||
1123 | return -ENOMEM; | ||
1124 | return 0; | ||
1125 | } | ||
1126 | |||
1127 | void vhost_cleanup(void) | ||
1128 | { | ||
1129 | destroy_workqueue(vhost_workqueue); | ||
1130 | } | ||