diff options
author | Tejun Heo <tj@kernel.org> | 2010-06-02 14:40:00 -0400 |
---|---|---|
committer | Michael S. Tsirkin <mst@redhat.com> | 2010-07-28 08:44:53 -0400 |
commit | c23f3445e68e1db0e74099f264bc5ff5d55ebdeb (patch) | |
tree | 0a8e22e9a10c2978777954a022d721eb02e622be /drivers/vhost/vhost.c | |
parent | 4cfa580e7eebb8694b875d2caff3b989ada2efac (diff) |
vhost: replace vhost_workqueue with per-vhost kthread
Replace vhost_workqueue with per-vhost kthread. Other than callback
argument change from struct work_struct * to struct vhost_work *,
there's no visible change to vhost_poll_*() interface.
This conversion is to make each vhost use a dedicated kthread so that
resource control via cgroup can be applied.
Partially based on Sridhar Samudrala's patch.
* Updated to use sub structure vhost_work instead of directly using
vhost_poll at Michael's suggestion.
* Added flusher wake_up() optimization at Michael's suggestion.
Changes by MST:
* Converted atomics/barrier use to a spinlock.
* Create thread on SET_OWNER
* Fix flushing
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Sridhar Samudrala <samudrala.sridhar@gmail.com>
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r-- | drivers/vhost/vhost.c | 143 |
1 files changed, 115 insertions, 28 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 248ed2db0711..30d93c2b45b8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -17,12 +17,12 @@ | |||
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/miscdevice.h> | 18 | #include <linux/miscdevice.h> |
19 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
20 | #include <linux/workqueue.h> | ||
21 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
22 | #include <linux/poll.h> | 21 | #include <linux/poll.h> |
23 | #include <linux/file.h> | 22 | #include <linux/file.h> |
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/kthread.h> | ||
26 | 26 | ||
27 | #include <linux/net.h> | 27 | #include <linux/net.h> |
28 | #include <linux/if_packet.h> | 28 | #include <linux/if_packet.h> |
@@ -37,8 +37,6 @@ enum { | |||
37 | VHOST_MEMORY_F_LOG = 0x1, | 37 | VHOST_MEMORY_F_LOG = 0x1, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static struct workqueue_struct *vhost_workqueue; | ||
41 | |||
42 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | 40 | static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, |
43 | poll_table *pt) | 41 | poll_table *pt) |
44 | { | 42 | { |
@@ -52,23 +50,31 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, | |||
52 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, | 50 | static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, |
53 | void *key) | 51 | void *key) |
54 | { | 52 | { |
55 | struct vhost_poll *poll; | 53 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); |
56 | poll = container_of(wait, struct vhost_poll, wait); | 54 | |
57 | if (!((unsigned long)key & poll->mask)) | 55 | if (!((unsigned long)key & poll->mask)) |
58 | return 0; | 56 | return 0; |
59 | 57 | ||
60 | queue_work(vhost_workqueue, &poll->work); | 58 | vhost_poll_queue(poll); |
61 | return 0; | 59 | return 0; |
62 | } | 60 | } |
63 | 61 | ||
64 | /* Init poll structure */ | 62 | /* Init poll structure */ |
65 | void vhost_poll_init(struct vhost_poll *poll, work_func_t func, | 63 | void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, |
66 | unsigned long mask) | 64 | unsigned long mask, struct vhost_dev *dev) |
67 | { | 65 | { |
68 | INIT_WORK(&poll->work, func); | 66 | struct vhost_work *work = &poll->work; |
67 | |||
69 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); | 68 | init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); |
70 | init_poll_funcptr(&poll->table, vhost_poll_func); | 69 | init_poll_funcptr(&poll->table, vhost_poll_func); |
71 | poll->mask = mask; | 70 | poll->mask = mask; |
71 | poll->dev = dev; | ||
72 | |||
73 | INIT_LIST_HEAD(&work->node); | ||
74 | work->fn = fn; | ||
75 | init_waitqueue_head(&work->done); | ||
76 | work->flushing = 0; | ||
77 | work->queue_seq = work->done_seq = 0; | ||
72 | } | 78 | } |
73 | 79 | ||
74 | /* Start polling a file. We add ourselves to file's wait queue. The caller must | 80 | /* Start polling a file. We add ourselves to file's wait queue. The caller must |
@@ -92,12 +98,40 @@ void vhost_poll_stop(struct vhost_poll *poll) | |||
92 | * locks that are also used by the callback. */ | 98 | * locks that are also used by the callback. */ |
93 | void vhost_poll_flush(struct vhost_poll *poll) | 99 | void vhost_poll_flush(struct vhost_poll *poll) |
94 | { | 100 | { |
95 | flush_work(&poll->work); | 101 | struct vhost_work *work = &poll->work; |
102 | unsigned seq; | ||
103 | int left; | ||
104 | int flushing; | ||
105 | |||
106 | spin_lock_irq(&poll->dev->work_lock); | ||
107 | seq = work->queue_seq; | ||
108 | work->flushing++; | ||
109 | spin_unlock_irq(&poll->dev->work_lock); | ||
110 | wait_event(work->done, ({ | ||
111 | spin_lock_irq(&poll->dev->work_lock); | ||
112 | left = seq - work->done_seq <= 0; | ||
113 | spin_unlock_irq(&poll->dev->work_lock); | ||
114 | left; | ||
115 | })); | ||
116 | spin_lock_irq(&poll->dev->work_lock); | ||
117 | flushing = --work->flushing; | ||
118 | spin_unlock_irq(&poll->dev->work_lock); | ||
119 | BUG_ON(flushing < 0); | ||
96 | } | 120 | } |
97 | 121 | ||
98 | void vhost_poll_queue(struct vhost_poll *poll) | 122 | void vhost_poll_queue(struct vhost_poll *poll) |
99 | { | 123 | { |
100 | queue_work(vhost_workqueue, &poll->work); | 124 | struct vhost_dev *dev = poll->dev; |
125 | struct vhost_work *work = &poll->work; | ||
126 | unsigned long flags; | ||
127 | |||
128 | spin_lock_irqsave(&dev->work_lock, flags); | ||
129 | if (list_empty(&work->node)) { | ||
130 | list_add_tail(&work->node, &dev->work_list); | ||
131 | work->queue_seq++; | ||
132 | wake_up_process(dev->worker); | ||
133 | } | ||
134 | spin_unlock_irqrestore(&dev->work_lock, flags); | ||
101 | } | 135 | } |
102 | 136 | ||
103 | static void vhost_vq_reset(struct vhost_dev *dev, | 137 | static void vhost_vq_reset(struct vhost_dev *dev, |
@@ -125,10 +159,51 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
125 | vq->log_ctx = NULL; | 159 | vq->log_ctx = NULL; |
126 | } | 160 | } |
127 | 161 | ||
162 | static int vhost_worker(void *data) | ||
163 | { | ||
164 | struct vhost_dev *dev = data; | ||
165 | struct vhost_work *work = NULL; | ||
166 | unsigned uninitialized_var(seq); | ||
167 | |||
168 | for (;;) { | ||
169 | /* mb paired w/ kthread_stop */ | ||
170 | set_current_state(TASK_INTERRUPTIBLE); | ||
171 | |||
172 | spin_lock_irq(&dev->work_lock); | ||
173 | if (work) { | ||
174 | work->done_seq = seq; | ||
175 | if (work->flushing) | ||
176 | wake_up_all(&work->done); | ||
177 | } | ||
178 | |||
179 | if (kthread_should_stop()) { | ||
180 | spin_unlock_irq(&dev->work_lock); | ||
181 | __set_current_state(TASK_RUNNING); | ||
182 | return 0; | ||
183 | } | ||
184 | if (!list_empty(&dev->work_list)) { | ||
185 | work = list_first_entry(&dev->work_list, | ||
186 | struct vhost_work, node); | ||
187 | list_del_init(&work->node); | ||
188 | seq = work->queue_seq; | ||
189 | } else | ||
190 | work = NULL; | ||
191 | spin_unlock_irq(&dev->work_lock); | ||
192 | |||
193 | if (work) { | ||
194 | __set_current_state(TASK_RUNNING); | ||
195 | work->fn(work); | ||
196 | } else | ||
197 | schedule(); | ||
198 | |||
199 | } | ||
200 | } | ||
201 | |||
128 | long vhost_dev_init(struct vhost_dev *dev, | 202 | long vhost_dev_init(struct vhost_dev *dev, |
129 | struct vhost_virtqueue *vqs, int nvqs) | 203 | struct vhost_virtqueue *vqs, int nvqs) |
130 | { | 204 | { |
131 | int i; | 205 | int i; |
206 | |||
132 | dev->vqs = vqs; | 207 | dev->vqs = vqs; |
133 | dev->nvqs = nvqs; | 208 | dev->nvqs = nvqs; |
134 | mutex_init(&dev->mutex); | 209 | mutex_init(&dev->mutex); |
@@ -136,6 +211,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
136 | dev->log_file = NULL; | 211 | dev->log_file = NULL; |
137 | dev->memory = NULL; | 212 | dev->memory = NULL; |
138 | dev->mm = NULL; | 213 | dev->mm = NULL; |
214 | spin_lock_init(&dev->work_lock); | ||
215 | INIT_LIST_HEAD(&dev->work_list); | ||
216 | dev->worker = NULL; | ||
139 | 217 | ||
140 | for (i = 0; i < dev->nvqs; ++i) { | 218 | for (i = 0; i < dev->nvqs; ++i) { |
141 | dev->vqs[i].dev = dev; | 219 | dev->vqs[i].dev = dev; |
@@ -143,9 +221,9 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
143 | vhost_vq_reset(dev, dev->vqs + i); | 221 | vhost_vq_reset(dev, dev->vqs + i); |
144 | if (dev->vqs[i].handle_kick) | 222 | if (dev->vqs[i].handle_kick) |
145 | vhost_poll_init(&dev->vqs[i].poll, | 223 | vhost_poll_init(&dev->vqs[i].poll, |
146 | dev->vqs[i].handle_kick, | 224 | dev->vqs[i].handle_kick, POLLIN, dev); |
147 | POLLIN); | ||
148 | } | 225 | } |
226 | |||
149 | return 0; | 227 | return 0; |
150 | } | 228 | } |
151 | 229 | ||
@@ -159,12 +237,31 @@ long vhost_dev_check_owner(struct vhost_dev *dev) | |||
159 | /* Caller should have device mutex */ | 237 | /* Caller should have device mutex */ |
160 | static long vhost_dev_set_owner(struct vhost_dev *dev) | 238 | static long vhost_dev_set_owner(struct vhost_dev *dev) |
161 | { | 239 | { |
240 | struct task_struct *worker; | ||
241 | int err; | ||
162 | /* Is there an owner already? */ | 242 | /* Is there an owner already? */ |
163 | if (dev->mm) | 243 | if (dev->mm) { |
164 | return -EBUSY; | 244 | err = -EBUSY; |
245 | goto err_mm; | ||
246 | } | ||
165 | /* No owner, become one */ | 247 | /* No owner, become one */ |
166 | dev->mm = get_task_mm(current); | 248 | dev->mm = get_task_mm(current); |
249 | worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); | ||
250 | if (IS_ERR(worker)) { | ||
251 | err = PTR_ERR(worker); | ||
252 | goto err_worker; | ||
253 | } | ||
254 | |||
255 | dev->worker = worker; | ||
256 | wake_up_process(worker); /* avoid contributing to loadavg */ | ||
257 | |||
167 | return 0; | 258 | return 0; |
259 | err_worker: | ||
260 | if (dev->mm) | ||
261 | mmput(dev->mm); | ||
262 | dev->mm = NULL; | ||
263 | err_mm: | ||
264 | return err; | ||
168 | } | 265 | } |
169 | 266 | ||
170 | /* Caller should have device mutex */ | 267 | /* Caller should have device mutex */ |
@@ -217,6 +314,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
217 | if (dev->mm) | 314 | if (dev->mm) |
218 | mmput(dev->mm); | 315 | mmput(dev->mm); |
219 | dev->mm = NULL; | 316 | dev->mm = NULL; |
317 | |||
318 | WARN_ON(!list_empty(&dev->work_list)); | ||
319 | kthread_stop(dev->worker); | ||
220 | } | 320 | } |
221 | 321 | ||
222 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) | 322 | static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) |
@@ -1115,16 +1215,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq) | |||
1115 | vq_err(vq, "Failed to enable notification at %p: %d\n", | 1215 | vq_err(vq, "Failed to enable notification at %p: %d\n", |
1116 | &vq->used->flags, r); | 1216 | &vq->used->flags, r); |
1117 | } | 1217 | } |
1118 | |||
1119 | int vhost_init(void) | ||
1120 | { | ||
1121 | vhost_workqueue = create_singlethread_workqueue("vhost"); | ||
1122 | if (!vhost_workqueue) | ||
1123 | return -ENOMEM; | ||
1124 | return 0; | ||
1125 | } | ||
1126 | |||
1127 | void vhost_cleanup(void) | ||
1128 | { | ||
1129 | destroy_workqueue(vhost_workqueue); | ||
1130 | } | ||