diff options
Diffstat (limited to 'drivers/vhost/vhost.c')
| -rw-r--r-- | drivers/vhost/vhost.c | 228 |
1 files changed, 196 insertions, 32 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 248ed2db0711..e05557d52999 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
| @@ -17,12 +17,13 @@ | |||
| 17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
| 18 | #include <linux/miscdevice.h> | 18 | #include <linux/miscdevice.h> |
| 19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
| 20 | #include <linux/workqueue.h> | ||
| 21 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
| 22 | #include <linux/poll.h> | 21 | #include <linux/poll.h> |
| 23 | #include <linux/file.h> | 22 | #include <linux/file.h> |
| 24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
| 25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/kthread.h> | ||
| 26 | #include <linux/cgroup.h> | ||
| 26 | 27 | ||
| 27 | #include <linux/net.h> | 28 | #include <linux/net.h> |
| 28 | #include <linux/if_packet.h> | 29 | #include <linux/if_packet.h> |
| @@ -37,8 +38,6 @@ enum { | |||
| 37 | VHOST_MEMORY_F_LOG = 0x1, | 38 | VHOST_MEMORY_F_LOG = 0x1, |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | static struct workqueue_struct *vhost_workqueue; | ||
| 41 | |||
| 42 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | 41 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, |
| 43 | poll_table *pt) | 42 | poll_table *pt) |
| 44 | { | 43 | { |
| @@ -52,23 +51,31 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | |||
| 52 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, | 51 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, |
| 53 | void *key) | 52 | void *key) |
| 54 | { | 53 | { |
| 55 | struct vhost_poll *poll; | 54 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); |
| 56 | poll = container_of(wait, struct vhost_poll, wait); | 55 | |
| 57 | if (!((unsigned long)key & poll->mask)) | 56 | if (!((unsigned long)key & poll->mask)) |
| 58 | return 0; | 57 | return 0; |
| 59 | 58 | ||
| 60 | queue_work(vhost_workqueue, &poll->work); | 59 | vhost_poll_queue(poll); |
| 61 | return 0; | 60 | return 0; |
| 62 | } | 61 | } |
| 63 | 62 | ||
| 64 | /* Init poll structure */ | 63 | /* Init poll structure */ |
| 65 | void vhost_poll_init(struct vhost_poll *poll, work_func_t func, | 64 | void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, |
| 66 | unsigned long mask) | 65 | unsigned long mask, struct vhost_dev *dev) |
| 67 | { | 66 | { |
| 68 | INIT_WORK(&poll->work, func); | 67 | struct vhost_work *work = &poll->work; |
| 68 | |||
| 69 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); | 69 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); |
| 70 | init_poll_funcptr(&poll->table, vhost_poll_func); | 70 | init_poll_funcptr(&poll->table, vhost_poll_func); |
| 71 | poll->mask = mask; | 71 | poll->mask = mask; |
| 72 | poll->dev = dev; | ||
| 73 | |||
| 74 | INIT_LIST_HEAD(&work->node); | ||
| 75 | work->fn = fn; | ||
| 76 | init_waitqueue_head(&work->done); | ||
| 77 | work->flushing = 0; | ||
| 78 | work->queue_seq = work->done_seq = 0; | ||
| 72 | } | 79 | } |
| 73 | 80 | ||
| 74 | /* Start polling a file. We add ourselves to file's wait queue. The caller must | 81 | /* Start polling a file. We add ourselves to file's wait queue. The caller must |
| @@ -92,12 +99,40 @@ void vhost_poll_stop(struct vhost_poll *poll) | |||
| 92 | * locks that are also used by the callback. */ | 99 | * locks that are also used by the callback. */ |
| 93 | void vhost_poll_flush(struct vhost_poll *poll) | 100 | void vhost_poll_flush(struct vhost_poll *poll) |
| 94 | { | 101 | { |
| 95 | flush_work(&poll->work); | 102 | struct vhost_work *work = &poll->work; |
| 103 | unsigned seq; | ||
| 104 | int left; | ||
| 105 | int flushing; | ||
| 106 | |||
| 107 | spin_lock_irq(&poll->dev->work_lock); | ||
| 108 | seq = work->queue_seq; | ||
| 109 | work->flushing++; | ||
| 110 | spin_unlock_irq(&poll->dev->work_lock); | ||
| 111 | wait_event(work->done, ({ | ||
| 112 | spin_lock_irq(&poll->dev->work_lock); | ||
| 113 | left = seq - work->done_seq <= 0; | ||
| 114 | spin_unlock_irq(&poll->dev->work_lock); | ||
| 115 | left; | ||
| 116 | })); | ||
| 117 | spin_lock_irq(&poll->dev->work_lock); | ||
| 118 | flushing = --work->flushing; | ||
| 119 | spin_unlock_irq(&poll->dev->work_lock); | ||
| 120 | BUG_ON(flushing < 0); | ||
| 96 | } | 121 | } |
| 97 | 122 | ||
| 98 | void vhost_poll_queue(struct vhost_poll *poll) | 123 | void vhost_poll_queue(struct vhost_poll *poll) |
| 99 | { | 124 | { |
| 100 | queue_work(vhost_workqueue, &poll->work); | 125 | struct vhost_dev *dev = poll->dev; |
| 126 | struct vhost_work *work = &poll->work; | ||
| 127 | unsigned long flags; | ||
| 128 | |||
| 129 | spin_lock_irqsave(&dev->work_lock, flags); | ||
| 130 | if (list_empty(&work->node)) { | ||
| 131 | list_add_tail(&work->node, &dev->work_list); | ||
| 132 | work->queue_seq++; | ||
| 133 | wake_up_process(dev->worker); | ||
| 134 | } | ||
| 135 | spin_unlock_irqrestore(&dev->work_lock, flags); | ||
| 101 | } | 136 | } |
| 102 | 137 | ||
| 103 | static void vhost_vq_reset(struct vhost_dev *dev, | 138 | static void vhost_vq_reset(struct vhost_dev *dev, |
| @@ -114,7 +149,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
| 114 | vq->used_flags = 0; | 149 | vq->used_flags = 0; |
| 115 | vq->log_used = false; | 150 | vq->log_used = false; |
| 116 | vq->log_addr = -1ull; | 151 | vq->log_addr = -1ull; |
| 117 | vq->hdr_size = 0; | 152 | vq->vhost_hlen = 0; |
| 153 | vq->sock_hlen = 0; | ||
| 118 | vq->private_data = NULL; | 154 | vq->private_data = NULL; |
| 119 | vq->log_base = NULL; | 155 | vq->log_base = NULL; |
| 120 | vq->error_ctx = NULL; | 156 | vq->error_ctx = NULL; |
| @@ -125,10 +161,51 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
| 125 | vq->log_ctx = NULL; | 161 | vq->log_ctx = NULL; |
| 126 | } | 162 | } |
| 127 | 163 | ||
| 164 | static int vhost_worker(void *data) | ||
| 165 | { | ||
| 166 | struct vhost_dev *dev = data; | ||
| 167 | struct vhost_work *work = NULL; | ||
| 168 | unsigned uninitialized_var(seq); | ||
| 169 | |||
| 170 | for (;;) { | ||
| 171 | /* mb paired w/ kthread_stop */ | ||
| 172 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 173 | |||
| 174 | spin_lock_irq(&dev->work_lock); | ||
| 175 | if (work) { | ||
| 176 | work->done_seq = seq; | ||
| 177 | if (work->flushing) | ||
| 178 | wake_up_all(&work->done); | ||
| 179 | } | ||
| 180 | |||
| 181 | if (kthread_should_stop()) { | ||
| 182 | spin_unlock_irq(&dev->work_lock); | ||
| 183 | __set_current_state(TASK_RUNNING); | ||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | if (!list_empty(&dev->work_list)) { | ||
| 187 | work = list_first_entry(&dev->work_list, | ||
| 188 | struct vhost_work, node); | ||
| 189 | list_del_init(&work->node); | ||
| 190 | seq = work->queue_seq; | ||
| 191 | } else | ||
| 192 | work = NULL; | ||
| 193 | spin_unlock_irq(&dev->work_lock); | ||
| 194 | |||
| 195 | if (work) { | ||
| 196 | __set_current_state(TASK_RUNNING); | ||
| 197 | work->fn(work); | ||
| 198 | } else | ||
| 199 | schedule(); | ||
| 200 | |||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 128 | long vhost_dev_init(struct vhost_dev *dev, | 204 | long vhost_dev_init(struct vhost_dev *dev, |
| 129 | struct vhost_virtqueue *vqs, int nvqs) | 205 | struct vhost_virtqueue *vqs, int nvqs) |
| 130 | { | 206 | { |
| 131 | int i; | 207 | int i; |
| 208 | |||
| 132 | dev->vqs = vqs; | 209 | dev->vqs = vqs; |
| 133 | dev->nvqs = nvqs; | 210 | dev->nvqs = nvqs; |
| 134 | mutex_init(&dev->mutex); | 211 | mutex_init(&dev->mutex); |
| @@ -136,6 +213,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
| 136 | dev->log_file = NULL; | 213 | dev->log_file = NULL; |
| 137 | dev->memory = NULL; | 214 | dev->memory = NULL; |
| 138 | dev->mm = NULL; | 215 | dev->mm = NULL; |
| 216 | spin_lock_init(&dev->work_lock); | ||
| 217 | INIT_LIST_HEAD(&dev->work_list); | ||
| 218 | dev->worker = NULL; | ||
| 139 | 219 | ||
| 140 | for (i = 0; i < dev->nvqs; ++i) { | 220 | for (i = 0; i < dev->nvqs; ++i) { |
| 141 | dev->vqs[i].dev = dev; | 221 | dev->vqs[i].dev = dev; |
| @@ -143,9 +223,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
| 143 | vhost_vq_reset(dev, dev->vqs + i); | 223 | vhost_vq_reset(dev, dev->vqs + i); |
| 144 | if (dev->vqs[i].handle_kick) | 224 | if (dev->vqs[i].handle_kick) |
| 145 | vhost_poll_init(&dev->vqs[i].poll, | 225 | vhost_poll_init(&dev->vqs[i].poll, |
| 146 | dev->vqs[i].handle_kick, | 226 | dev->vqs[i].handle_kick, POLLIN, dev); |
| 147 | POLLIN); | ||
| 148 | } | 227 | } |
| 228 | |||
| 149 | return 0; | 229 | return 0; |
| 150 | } | 230 | } |
| 151 | 231 | ||
| @@ -159,12 +239,36 @@ long vhost_dev_check_owner(struct vhost_dev *dev) | |||
| 159 | /* Caller should have device mutex */ | 239 | /* Caller should have device mutex */ |
| 160 | static long vhost_dev_set_owner(struct vhost_dev *dev) | 240 | static long vhost_dev_set_owner(struct vhost_dev *dev) |
| 161 | { | 241 | { |
| 242 | struct task_struct *worker; | ||
| 243 | int err; | ||
| 162 | /* Is there an owner already? */ | 244 | /* Is there an owner already? */ |
| 163 | if (dev->mm) | 245 | if (dev->mm) { |
| 164 | return -EBUSY; | 246 | err = -EBUSY; |
| 247 | goto err_mm; | ||
| 248 | } | ||
| 165 | /* No owner, become one */ | 249 | /* No owner, become one */ |
| 166 | dev->mm = get_task_mm(current); | 250 | dev->mm = get_task_mm(current); |
| 251 | worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); | ||
| 252 | if (IS_ERR(worker)) { | ||
| 253 | err = PTR_ERR(worker); | ||
| 254 | goto err_worker; | ||
| 255 | } | ||
| 256 | |||
| 257 | dev->worker = worker; | ||
| 258 | err = cgroup_attach_task_current_cg(worker); | ||
| 259 | if (err) | ||
| 260 | goto err_cgroup; | ||
| 261 | wake_up_process(worker); /* avoid contributing to loadavg */ | ||
| 262 | |||
| 167 | return 0; | 263 | return 0; |
| 264 | err_cgroup: | ||
| 265 | kthread_stop(worker); | ||
| 266 | err_worker: | ||
| 267 | if (dev->mm) | ||
| 268 | mmput(dev->mm); | ||
| 269 | dev->mm = NULL; | ||
| 270 | err_mm: | ||
| 271 | return err; | ||
| 168 | } | 272 | } |
| 169 | 273 | ||
| 170 | /* Caller should have device mutex */ | 274 | /* Caller should have device mutex */ |
| @@ -217,6 +321,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
| 217 | if (dev->mm) | 321 | if (dev->mm) |
| 218 | mmput(dev->mm); | 322 | mmput(dev->mm); |
| 219 | dev->mm = NULL; | 323 | dev->mm = NULL; |
| 324 | |||
| 325 | WARN_ON(!list_empty(&dev->work_list)); | ||
| 326 | kthread_stop(dev->worker); | ||
| 220 | } | 327 | } |
| 221 | 328 | ||
| 222 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) | 329 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) |
| @@ -995,9 +1102,9 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, | |||
| 995 | } | 1102 | } |
| 996 | 1103 | ||
| 997 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ | 1104 | /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ |
| 998 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq) | 1105 | void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) |
| 999 | { | 1106 | { |
| 1000 | vq->last_avail_idx--; | 1107 | vq->last_avail_idx -= n; |
| 1001 | } | 1108 | } |
| 1002 | 1109 | ||
| 1003 | /* After we've used one of their buffers, we tell them about it. We'll then | 1110 | /* After we've used one of their buffers, we tell them about it. We'll then |
| @@ -1042,6 +1149,67 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) | |||
| 1042 | return 0; | 1149 | return 0; |
| 1043 | } | 1150 | } |
| 1044 | 1151 | ||
| 1152 | static int __vhost_add_used_n(struct vhost_virtqueue *vq, | ||
| 1153 | struct vring_used_elem *heads, | ||
| 1154 | unsigned count) | ||
| 1155 | { | ||
| 1156 | struct vring_used_elem __user *used; | ||
| 1157 | int start; | ||
| 1158 | |||
| 1159 | start = vq->last_used_idx % vq->num; | ||
| 1160 | used = vq->used->ring + start; | ||
| 1161 | if (copy_to_user(used, heads, count * sizeof *used)) { | ||
| 1162 | vq_err(vq, "Failed to write used"); | ||
| 1163 | return -EFAULT; | ||
| 1164 | } | ||
| 1165 | if (unlikely(vq->log_used)) { | ||
| 1166 | /* Make sure data is seen before log. */ | ||
| 1167 | smp_wmb(); | ||
| 1168 | /* Log used ring entry write. */ | ||
| 1169 | log_write(vq->log_base, | ||
| 1170 | vq->log_addr + | ||
| 1171 | ((void __user *)used - (void __user *)vq->used), | ||
| 1172 | count * sizeof *used); | ||
| 1173 | } | ||
| 1174 | vq->last_used_idx += count; | ||
| 1175 | return 0; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | /* After we've used one of their buffers, we tell them about it. We'll then | ||
| 1179 | * want to notify the guest, using eventfd. */ | ||
| 1180 | int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, | ||
| 1181 | unsigned count) | ||
| 1182 | { | ||
| 1183 | int start, n, r; | ||
| 1184 | |||
| 1185 | start = vq->last_used_idx % vq->num; | ||
| 1186 | n = vq->num - start; | ||
| 1187 | if (n < count) { | ||
| 1188 | r = __vhost_add_used_n(vq, heads, n); | ||
| 1189 | if (r < 0) | ||
| 1190 | return r; | ||
| 1191 | heads += n; | ||
| 1192 | count -= n; | ||
| 1193 | } | ||
| 1194 | r = __vhost_add_used_n(vq, heads, count); | ||
| 1195 | |||
| 1196 | /* Make sure buffer is written before we update index. */ | ||
| 1197 | smp_wmb(); | ||
| 1198 | if (put_user(vq->last_used_idx, &vq->used->idx)) { | ||
| 1199 | vq_err(vq, "Failed to increment used idx"); | ||
| 1200 | return -EFAULT; | ||
| 1201 | } | ||
| 1202 | if (unlikely(vq->log_used)) { | ||
| 1203 | /* Log used index update. */ | ||
| 1204 | log_write(vq->log_base, | ||
| 1205 | vq->log_addr + offsetof(struct vring_used, idx), | ||
| 1206 | sizeof vq->used->idx); | ||
| 1207 | if (vq->log_ctx) | ||
| 1208 | eventfd_signal(vq->log_ctx, 1); | ||
| 1209 | } | ||
| 1210 | return r; | ||
| 1211 | } | ||
| 1212 | |||
| 1045 | /* This actually signals the guest, using eventfd. */ | 1213 | /* This actually signals the guest, using eventfd. */ |
| 1046 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) | 1214 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
| 1047 | { | 1215 | { |
| @@ -1076,6 +1244,15 @@ void vhost_add_used_and_signal(struct vhost_dev *dev, | |||
| 1076 | vhost_signal(dev, vq); | 1244 | vhost_signal(dev, vq); |
| 1077 | } | 1245 | } |
| 1078 | 1246 | ||
| 1247 | /* multi-buffer version of vhost_add_used_and_signal */ | ||
| 1248 | void vhost_add_used_and_signal_n(struct vhost_dev *dev, | ||
| 1249 | struct vhost_virtqueue *vq, | ||
| 1250 | struct vring_used_elem *heads, unsigned count) | ||
| 1251 | { | ||
| 1252 | vhost_add_used_n(vq, heads, count); | ||
| 1253 | vhost_signal(dev, vq); | ||
| 1254 | } | ||
| 1255 | |||
| 1079 | /* OK, now we need to know about added descriptors. */ | 1256 | /* OK, now we need to know about added descriptors. */ |
| 1080 | bool vhost_enable_notify(struct vhost_virtqueue *vq) | 1257 | bool vhost_enable_notify(struct vhost_virtqueue *vq) |
| 1081 | { | 1258 | { |
| @@ -1100,7 +1277,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) | |||
| 1100 | return false; | 1277 | return false; |
| 1101 | } | 1278 | } |
| 1102 | 1279 | ||
| 1103 | return avail_idx != vq->last_avail_idx; | 1280 | return avail_idx != vq->avail_idx; |
| 1104 | } | 1281 | } |
| 1105 | 1282 | ||
| 1106 | /* We don't need to be notified again. */ | 1283 | /* We don't need to be notified again. */ |
| @@ -1115,16 +1292,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq) | |||
| 1115 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1292 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
| 1116 | &vq->used->flags, r); | 1293 | &vq->used->flags, r); |
| 1117 | } | 1294 | } |
| 1118 | |||
| 1119 | int vhost_init(void) | ||
| 1120 | { | ||
| 1121 | vhost_workqueue = create_singlethread_workqueue("vhost"); | ||
| 1122 | if (!vhost_workqueue) | ||
| 1123 | return -ENOMEM; | ||
| 1124 | return 0; | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | void vhost_cleanup(void) | ||
| 1128 | { | ||
| 1129 | destroy_workqueue(vhost_workqueue); | ||
| 1130 | } | ||
