aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/xen/xenbus/xenbus.h48
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c307
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c188
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c520
4 files changed, 672 insertions, 391 deletions
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 51995276f549..149c5e7efc89 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -32,6 +32,10 @@
32#ifndef _XENBUS_XENBUS_H 32#ifndef _XENBUS_XENBUS_H
33#define _XENBUS_XENBUS_H 33#define _XENBUS_XENBUS_H
34 34
35#include <linux/mutex.h>
36#include <linux/uio.h>
37#include <xen/xenbus.h>
38
35#define XEN_BUS_ID_SIZE 20 39#define XEN_BUS_ID_SIZE 20
36 40
37struct xen_bus_type { 41struct xen_bus_type {
@@ -52,16 +56,49 @@ enum xenstore_init {
52 XS_LOCAL, 56 XS_LOCAL,
53}; 57};
54 58
59struct xs_watch_event {
60 struct list_head list;
61 unsigned int len;
62 struct xenbus_watch *handle;
63 const char *path;
64 const char *token;
65 char body[];
66};
67
68enum xb_req_state {
69 xb_req_state_queued,
70 xb_req_state_wait_reply,
71 xb_req_state_got_reply,
72 xb_req_state_aborted
73};
74
75struct xb_req_data {
76 struct list_head list;
77 wait_queue_head_t wq;
78 struct xsd_sockmsg msg;
79 enum xsd_sockmsg_type type;
80 char *body;
81 const struct kvec *vec;
82 int num_vecs;
83 int err;
84 enum xb_req_state state;
85 void (*cb)(struct xb_req_data *);
86 void *par;
87};
88
55extern enum xenstore_init xen_store_domain_type; 89extern enum xenstore_init xen_store_domain_type;
56extern const struct attribute_group *xenbus_dev_groups[]; 90extern const struct attribute_group *xenbus_dev_groups[];
91extern struct mutex xs_response_mutex;
92extern struct list_head xs_reply_list;
93extern struct list_head xb_write_list;
94extern wait_queue_head_t xb_waitq;
95extern struct mutex xb_write_mutex;
57 96
58int xs_init(void); 97int xs_init(void);
59int xb_init_comms(void); 98int xb_init_comms(void);
60void xb_deinit_comms(void); 99void xb_deinit_comms(void);
61int xb_write(const void *data, unsigned int len); 100int xs_watch_msg(struct xs_watch_event *event);
62int xb_read(void *data, unsigned int len); 101void xs_request_exit(struct xb_req_data *req);
63int xb_data_to_read(void);
64int xb_wait_for_data_to_read(void);
65 102
66int xenbus_match(struct device *_dev, struct device_driver *_drv); 103int xenbus_match(struct device *_dev, struct device_driver *_drv);
67int xenbus_dev_probe(struct device *_dev); 104int xenbus_dev_probe(struct device *_dev);
@@ -92,6 +129,7 @@ int xenbus_read_otherend_details(struct xenbus_device *xendev,
92 129
93void xenbus_ring_ops_init(void); 130void xenbus_ring_ops_init(void);
94 131
95void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); 132int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par);
133void xenbus_dev_queue_reply(struct xb_req_data *req);
96 134
97#endif 135#endif
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index c21ec02643e1..856ada5d39c9 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/wait.h> 35#include <linux/wait.h>
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/kthread.h>
37#include <linux/sched.h> 38#include <linux/sched.h>
38#include <linux/err.h> 39#include <linux/err.h>
39#include <xen/xenbus.h> 40#include <xen/xenbus.h>
@@ -42,11 +43,22 @@
42#include <xen/page.h> 43#include <xen/page.h>
43#include "xenbus.h" 44#include "xenbus.h"
44 45
46/* A list of replies. Currently only one will ever be outstanding. */
47LIST_HEAD(xs_reply_list);
48
49/* A list of write requests. */
50LIST_HEAD(xb_write_list);
51DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52DEFINE_MUTEX(xb_write_mutex);
53
54/* Protect xenbus reader thread against save/restore. */
55DEFINE_MUTEX(xs_response_mutex);
56
45static int xenbus_irq; 57static int xenbus_irq;
58static struct task_struct *xenbus_task;
46 59
47static DECLARE_WORK(probe_work, xenbus_probe); 60static DECLARE_WORK(probe_work, xenbus_probe);
48 61
49static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
50 62
51static irqreturn_t wake_waiting(int irq, void *unused) 63static irqreturn_t wake_waiting(int irq, void *unused)
52{ 64{
@@ -84,30 +96,31 @@ static const void *get_input_chunk(XENSTORE_RING_IDX cons,
84 return buf + MASK_XENSTORE_IDX(cons); 96 return buf + MASK_XENSTORE_IDX(cons);
85} 97}
86 98
99static int xb_data_to_write(void)
100{
101 struct xenstore_domain_interface *intf = xen_store_interface;
102
103 return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
104 !list_empty(&xb_write_list);
105}
106
87/** 107/**
88 * xb_write - low level write 108 * xb_write - low level write
89 * @data: buffer to send 109 * @data: buffer to send
90 * @len: length of buffer 110 * @len: length of buffer
91 * 111 *
92 * Returns 0 on success, error otherwise. 112 * Returns number of bytes written or -err.
93 */ 113 */
94int xb_write(const void *data, unsigned len) 114static int xb_write(const void *data, unsigned int len)
95{ 115{
96 struct xenstore_domain_interface *intf = xen_store_interface; 116 struct xenstore_domain_interface *intf = xen_store_interface;
97 XENSTORE_RING_IDX cons, prod; 117 XENSTORE_RING_IDX cons, prod;
98 int rc; 118 unsigned int bytes = 0;
99 119
100 while (len != 0) { 120 while (len != 0) {
101 void *dst; 121 void *dst;
102 unsigned int avail; 122 unsigned int avail;
103 123
104 rc = wait_event_interruptible(
105 xb_waitq,
106 (intf->req_prod - intf->req_cons) !=
107 XENSTORE_RING_SIZE);
108 if (rc < 0)
109 return rc;
110
111 /* Read indexes, then verify. */ 124 /* Read indexes, then verify. */
112 cons = intf->req_cons; 125 cons = intf->req_cons;
113 prod = intf->req_prod; 126 prod = intf->req_prod;
@@ -115,6 +128,11 @@ int xb_write(const void *data, unsigned len)
115 intf->req_cons = intf->req_prod = 0; 128 intf->req_cons = intf->req_prod = 0;
116 return -EIO; 129 return -EIO;
117 } 130 }
131 if (!xb_data_to_write())
132 return bytes;
133
134 /* Must write data /after/ reading the consumer index. */
135 virt_mb();
118 136
119 dst = get_output_chunk(cons, prod, intf->req, &avail); 137 dst = get_output_chunk(cons, prod, intf->req, &avail);
120 if (avail == 0) 138 if (avail == 0)
@@ -122,52 +140,45 @@ int xb_write(const void *data, unsigned len)
122 if (avail > len) 140 if (avail > len)
123 avail = len; 141 avail = len;
124 142
125 /* Must write data /after/ reading the consumer index. */
126 virt_mb();
127
128 memcpy(dst, data, avail); 143 memcpy(dst, data, avail);
129 data += avail; 144 data += avail;
130 len -= avail; 145 len -= avail;
146 bytes += avail;
131 147
132 /* Other side must not see new producer until data is there. */ 148 /* Other side must not see new producer until data is there. */
133 virt_wmb(); 149 virt_wmb();
134 intf->req_prod += avail; 150 intf->req_prod += avail;
135 151
136 /* Implies mb(): other side will see the updated producer. */ 152 /* Implies mb(): other side will see the updated producer. */
137 notify_remote_via_evtchn(xen_store_evtchn); 153 if (prod <= intf->req_cons)
154 notify_remote_via_evtchn(xen_store_evtchn);
138 } 155 }
139 156
140 return 0; 157 return bytes;
141} 158}
142 159
143int xb_data_to_read(void) 160static int xb_data_to_read(void)
144{ 161{
145 struct xenstore_domain_interface *intf = xen_store_interface; 162 struct xenstore_domain_interface *intf = xen_store_interface;
146 return (intf->rsp_cons != intf->rsp_prod); 163 return (intf->rsp_cons != intf->rsp_prod);
147} 164}
148 165
149int xb_wait_for_data_to_read(void) 166static int xb_read(void *data, unsigned int len)
150{
151 return wait_event_interruptible(xb_waitq, xb_data_to_read());
152}
153
154int xb_read(void *data, unsigned len)
155{ 167{
156 struct xenstore_domain_interface *intf = xen_store_interface; 168 struct xenstore_domain_interface *intf = xen_store_interface;
157 XENSTORE_RING_IDX cons, prod; 169 XENSTORE_RING_IDX cons, prod;
158 int rc; 170 unsigned int bytes = 0;
159 171
160 while (len != 0) { 172 while (len != 0) {
161 unsigned int avail; 173 unsigned int avail;
162 const char *src; 174 const char *src;
163 175
164 rc = xb_wait_for_data_to_read();
165 if (rc < 0)
166 return rc;
167
168 /* Read indexes, then verify. */ 176 /* Read indexes, then verify. */
169 cons = intf->rsp_cons; 177 cons = intf->rsp_cons;
170 prod = intf->rsp_prod; 178 prod = intf->rsp_prod;
179 if (cons == prod)
180 return bytes;
181
171 if (!check_indexes(cons, prod)) { 182 if (!check_indexes(cons, prod)) {
172 intf->rsp_cons = intf->rsp_prod = 0; 183 intf->rsp_cons = intf->rsp_prod = 0;
173 return -EIO; 184 return -EIO;
@@ -185,17 +196,243 @@ int xb_read(void *data, unsigned len)
185 memcpy(data, src, avail); 196 memcpy(data, src, avail);
186 data += avail; 197 data += avail;
187 len -= avail; 198 len -= avail;
199 bytes += avail;
188 200
189 /* Other side must not see free space until we've copied out */ 201 /* Other side must not see free space until we've copied out */
190 virt_mb(); 202 virt_mb();
191 intf->rsp_cons += avail; 203 intf->rsp_cons += avail;
192 204
193 pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
194
195 /* Implies mb(): other side will see the updated consumer. */ 205 /* Implies mb(): other side will see the updated consumer. */
196 notify_remote_via_evtchn(xen_store_evtchn); 206 if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
207 notify_remote_via_evtchn(xen_store_evtchn);
208 }
209
210 return bytes;
211}
212
213static int process_msg(void)
214{
215 static struct {
216 struct xsd_sockmsg msg;
217 char *body;
218 union {
219 void *alloc;
220 struct xs_watch_event *watch;
221 };
222 bool in_msg;
223 bool in_hdr;
224 unsigned int read;
225 } state;
226 struct xb_req_data *req;
227 int err;
228 unsigned int len;
229
230 if (!state.in_msg) {
231 state.in_msg = true;
232 state.in_hdr = true;
233 state.read = 0;
234
235 /*
236 * We must disallow save/restore while reading a message.
237 * A partial read across s/r leaves us out of sync with
238 * xenstored.
239 * xs_response_mutex is locked as long as we are processing one
240 * message. state.in_msg will be true as long as we are holding
241 * the lock here.
242 */
243 mutex_lock(&xs_response_mutex);
244
245 if (!xb_data_to_read()) {
246 /* We raced with save/restore: pending data 'gone'. */
247 mutex_unlock(&xs_response_mutex);
248 state.in_msg = false;
249 return 0;
250 }
251 }
252
253 if (state.in_hdr) {
254 if (state.read != sizeof(state.msg)) {
255 err = xb_read((void *)&state.msg + state.read,
256 sizeof(state.msg) - state.read);
257 if (err < 0)
258 goto out;
259 state.read += err;
260 if (state.read != sizeof(state.msg))
261 return 0;
262 if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
263 err = -EINVAL;
264 goto out;
265 }
266 }
267
268 len = state.msg.len + 1;
269 if (state.msg.type == XS_WATCH_EVENT)
270 len += sizeof(*state.watch);
271
272 state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
273 if (!state.alloc)
274 return -ENOMEM;
275
276 if (state.msg.type == XS_WATCH_EVENT)
277 state.body = state.watch->body;
278 else
279 state.body = state.alloc;
280 state.in_hdr = false;
281 state.read = 0;
282 }
283
284 err = xb_read(state.body + state.read, state.msg.len - state.read);
285 if (err < 0)
286 goto out;
287
288 state.read += err;
289 if (state.read != state.msg.len)
290 return 0;
291
292 state.body[state.msg.len] = '\0';
293
294 if (state.msg.type == XS_WATCH_EVENT) {
295 state.watch->len = state.msg.len;
296 err = xs_watch_msg(state.watch);
297 } else {
298 err = -ENOENT;
299 mutex_lock(&xb_write_mutex);
300 list_for_each_entry(req, &xs_reply_list, list) {
301 if (req->msg.req_id == state.msg.req_id) {
302 if (req->state == xb_req_state_wait_reply) {
303 req->msg.type = state.msg.type;
304 req->msg.len = state.msg.len;
305 req->body = state.body;
306 req->state = xb_req_state_got_reply;
307 list_del(&req->list);
308 req->cb(req);
309 } else {
310 list_del(&req->list);
311 kfree(req);
312 }
313 err = 0;
314 break;
315 }
316 }
317 mutex_unlock(&xb_write_mutex);
318 if (err)
319 goto out;
197 } 320 }
198 321
322 mutex_unlock(&xs_response_mutex);
323
324 state.in_msg = false;
325 state.alloc = NULL;
326 return err;
327
328 out:
329 mutex_unlock(&xs_response_mutex);
330 state.in_msg = false;
331 kfree(state.alloc);
332 state.alloc = NULL;
333 return err;
334}
335
336static int process_writes(void)
337{
338 static struct {
339 struct xb_req_data *req;
340 int idx;
341 unsigned int written;
342 } state;
343 void *base;
344 unsigned int len;
345 int err = 0;
346
347 if (!xb_data_to_write())
348 return 0;
349
350 mutex_lock(&xb_write_mutex);
351
352 if (!state.req) {
353 state.req = list_first_entry(&xb_write_list,
354 struct xb_req_data, list);
355 state.idx = -1;
356 state.written = 0;
357 }
358
359 if (state.req->state == xb_req_state_aborted)
360 goto out_err;
361
362 while (state.idx < state.req->num_vecs) {
363 if (state.idx < 0) {
364 base = &state.req->msg;
365 len = sizeof(state.req->msg);
366 } else {
367 base = state.req->vec[state.idx].iov_base;
368 len = state.req->vec[state.idx].iov_len;
369 }
370 err = xb_write(base + state.written, len - state.written);
371 if (err < 0)
372 goto out_err;
373 state.written += err;
374 if (state.written != len)
375 goto out;
376
377 state.idx++;
378 state.written = 0;
379 }
380
381 list_del(&state.req->list);
382 state.req->state = xb_req_state_wait_reply;
383 list_add_tail(&state.req->list, &xs_reply_list);
384 state.req = NULL;
385
386 out:
387 mutex_unlock(&xb_write_mutex);
388
389 return 0;
390
391 out_err:
392 state.req->msg.type = XS_ERROR;
393 state.req->err = err;
394 list_del(&state.req->list);
395 if (state.req->state == xb_req_state_aborted)
396 kfree(state.req);
397 else {
398 state.req->state = xb_req_state_got_reply;
399 wake_up(&state.req->wq);
400 }
401
402 mutex_unlock(&xb_write_mutex);
403
404 state.req = NULL;
405
406 return err;
407}
408
409static int xb_thread_work(void)
410{
411 return xb_data_to_read() || xb_data_to_write();
412}
413
414static int xenbus_thread(void *unused)
415{
416 int err;
417
418 while (!kthread_should_stop()) {
419 if (wait_event_interruptible(xb_waitq, xb_thread_work()))
420 continue;
421
422 err = process_msg();
423 if (err == -ENOMEM)
424 schedule();
425 else if (err)
426 pr_warn_ratelimited("error %d while reading message\n",
427 err);
428
429 err = process_writes();
430 if (err)
431 pr_warn_ratelimited("error %d while writing message\n",
432 err);
433 }
434
435 xenbus_task = NULL;
199 return 0; 436 return 0;
200} 437}
201 438
@@ -223,6 +460,7 @@ int xb_init_comms(void)
223 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); 460 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
224 } else { 461 } else {
225 int err; 462 int err;
463
226 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 464 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
227 0, "xenbus", &xb_waitq); 465 0, "xenbus", &xb_waitq);
228 if (err < 0) { 466 if (err < 0) {
@@ -231,6 +469,13 @@ int xb_init_comms(void)
231 } 469 }
232 470
233 xenbus_irq = err; 471 xenbus_irq = err;
472
473 if (!xenbus_task) {
474 xenbus_task = kthread_run(xenbus_thread, NULL,
475 "xenbus");
476 if (IS_ERR(xenbus_task))
477 return PTR_ERR(xenbus_task);
478 }
234 } 479 }
235 480
236 return 0; 481 return 0;
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index e4b984777507..4d343eed08f5 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -113,6 +113,7 @@ struct xenbus_file_priv {
113 struct list_head read_buffers; 113 struct list_head read_buffers;
114 wait_queue_head_t read_waitq; 114 wait_queue_head_t read_waitq;
115 115
116 struct kref kref;
116}; 117};
117 118
118/* Read out any raw xenbus messages queued up. */ 119/* Read out any raw xenbus messages queued up. */
@@ -297,6 +298,107 @@ static void watch_fired(struct xenbus_watch *watch,
297 mutex_unlock(&adap->dev_data->reply_mutex); 298 mutex_unlock(&adap->dev_data->reply_mutex);
298} 299}
299 300
301static void xenbus_file_free(struct kref *kref)
302{
303 struct xenbus_file_priv *u;
304 struct xenbus_transaction_holder *trans, *tmp;
305 struct watch_adapter *watch, *tmp_watch;
306 struct read_buffer *rb, *tmp_rb;
307
308 u = container_of(kref, struct xenbus_file_priv, kref);
309
310 /*
311 * No need for locking here because there are no other users,
312 * by definition.
313 */
314
315 list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
316 xenbus_transaction_end(trans->handle, 1);
317 list_del(&trans->list);
318 kfree(trans);
319 }
320
321 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
322 unregister_xenbus_watch(&watch->watch);
323 list_del(&watch->list);
324 free_watch_adapter(watch);
325 }
326
327 list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
328 list_del(&rb->list);
329 kfree(rb);
330 }
331 kfree(u);
332}
333
334static struct xenbus_transaction_holder *xenbus_get_transaction(
335 struct xenbus_file_priv *u, uint32_t tx_id)
336{
337 struct xenbus_transaction_holder *trans;
338
339 list_for_each_entry(trans, &u->transactions, list)
340 if (trans->handle.id == tx_id)
341 return trans;
342
343 return NULL;
344}
345
346void xenbus_dev_queue_reply(struct xb_req_data *req)
347{
348 struct xenbus_file_priv *u = req->par;
349 struct xenbus_transaction_holder *trans = NULL;
350 int rc;
351 LIST_HEAD(staging_q);
352
353 xs_request_exit(req);
354
355 mutex_lock(&u->msgbuffer_mutex);
356
357 if (req->type == XS_TRANSACTION_START) {
358 trans = xenbus_get_transaction(u, 0);
359 if (WARN_ON(!trans))
360 goto out;
361 if (req->msg.type == XS_ERROR) {
362 list_del(&trans->list);
363 kfree(trans);
364 } else {
365 rc = kstrtou32(req->body, 10, &trans->handle.id);
366 if (WARN_ON(rc))
367 goto out;
368 }
369 } else if (req->msg.type == XS_TRANSACTION_END) {
370 trans = xenbus_get_transaction(u, req->msg.tx_id);
371 if (WARN_ON(!trans))
372 goto out;
373 list_del(&trans->list);
374 kfree(trans);
375 }
376
377 mutex_unlock(&u->msgbuffer_mutex);
378
379 mutex_lock(&u->reply_mutex);
380 rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
381 if (!rc)
382 rc = queue_reply(&staging_q, req->body, req->msg.len);
383 if (!rc) {
384 list_splice_tail(&staging_q, &u->read_buffers);
385 wake_up(&u->read_waitq);
386 } else {
387 queue_cleanup(&staging_q);
388 }
389 mutex_unlock(&u->reply_mutex);
390
391 kfree(req->body);
392 kfree(req);
393
394 kref_put(&u->kref, xenbus_file_free);
395
396 return;
397
398 out:
399 mutex_unlock(&u->msgbuffer_mutex);
400}
401
300static int xenbus_command_reply(struct xenbus_file_priv *u, 402static int xenbus_command_reply(struct xenbus_file_priv *u,
301 unsigned int msg_type, const char *reply) 403 unsigned int msg_type, const char *reply)
302{ 404{
@@ -317,6 +419,9 @@ static int xenbus_command_reply(struct xenbus_file_priv *u,
317 wake_up(&u->read_waitq); 419 wake_up(&u->read_waitq);
318 mutex_unlock(&u->reply_mutex); 420 mutex_unlock(&u->reply_mutex);
319 421
422 if (!rc)
423 kref_put(&u->kref, xenbus_file_free);
424
320 return rc; 425 return rc;
321} 426}
322 427
@@ -324,57 +429,22 @@ static int xenbus_write_transaction(unsigned msg_type,
324 struct xenbus_file_priv *u) 429 struct xenbus_file_priv *u)
325{ 430{
326 int rc; 431 int rc;
327 void *reply;
328 struct xenbus_transaction_holder *trans = NULL; 432 struct xenbus_transaction_holder *trans = NULL;
329 LIST_HEAD(staging_q);
330 433
331 if (msg_type == XS_TRANSACTION_START) { 434 if (msg_type == XS_TRANSACTION_START) {
332 trans = kmalloc(sizeof(*trans), GFP_KERNEL); 435 trans = kzalloc(sizeof(*trans), GFP_KERNEL);
333 if (!trans) { 436 if (!trans) {
334 rc = -ENOMEM; 437 rc = -ENOMEM;
335 goto out; 438 goto out;
336 } 439 }
337 } else if (u->u.msg.tx_id != 0) { 440 list_add(&trans->list, &u->transactions);
338 list_for_each_entry(trans, &u->transactions, list) 441 } else if (u->u.msg.tx_id != 0 &&
339 if (trans->handle.id == u->u.msg.tx_id) 442 !xenbus_get_transaction(u, u->u.msg.tx_id))
340 break; 443 return xenbus_command_reply(u, XS_ERROR, "ENOENT");
341 if (&trans->list == &u->transactions)
342 return xenbus_command_reply(u, XS_ERROR, "ENOENT");
343 }
344
345 reply = xenbus_dev_request_and_reply(&u->u.msg);
346 if (IS_ERR(reply)) {
347 if (msg_type == XS_TRANSACTION_START)
348 kfree(trans);
349 rc = PTR_ERR(reply);
350 goto out;
351 }
352 444
353 if (msg_type == XS_TRANSACTION_START) { 445 rc = xenbus_dev_request_and_reply(&u->u.msg, u);
354 if (u->u.msg.type == XS_ERROR) 446 if (rc)
355 kfree(trans);
356 else {
357 trans->handle.id = simple_strtoul(reply, NULL, 0);
358 list_add(&trans->list, &u->transactions);
359 }
360 } else if (u->u.msg.type == XS_TRANSACTION_END) {
361 list_del(&trans->list);
362 kfree(trans); 447 kfree(trans);
363 }
364
365 mutex_lock(&u->reply_mutex);
366 rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
367 if (!rc)
368 rc = queue_reply(&staging_q, reply, u->u.msg.len);
369 if (!rc) {
370 list_splice_tail(&staging_q, &u->read_buffers);
371 wake_up(&u->read_waitq);
372 } else {
373 queue_cleanup(&staging_q);
374 }
375 mutex_unlock(&u->reply_mutex);
376
377 kfree(reply);
378 448
379out: 449out:
380 return rc; 450 return rc;
@@ -506,6 +576,8 @@ static ssize_t xenbus_file_write(struct file *filp,
506 * OK, now we have a complete message. Do something with it. 576 * OK, now we have a complete message. Do something with it.
507 */ 577 */
508 578
579 kref_get(&u->kref);
580
509 msg_type = u->u.msg.type; 581 msg_type = u->u.msg.type;
510 582
511 switch (msg_type) { 583 switch (msg_type) {
@@ -520,8 +592,10 @@ static ssize_t xenbus_file_write(struct file *filp,
520 ret = xenbus_write_transaction(msg_type, u); 592 ret = xenbus_write_transaction(msg_type, u);
521 break; 593 break;
522 } 594 }
523 if (ret != 0) 595 if (ret != 0) {
524 rc = ret; 596 rc = ret;
597 kref_put(&u->kref, xenbus_file_free);
598 }
525 599
526 /* Buffered message consumed */ 600 /* Buffered message consumed */
527 u->len = 0; 601 u->len = 0;
@@ -546,6 +620,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
546 if (u == NULL) 620 if (u == NULL)
547 return -ENOMEM; 621 return -ENOMEM;
548 622
623 kref_init(&u->kref);
624
549 INIT_LIST_HEAD(&u->transactions); 625 INIT_LIST_HEAD(&u->transactions);
550 INIT_LIST_HEAD(&u->watches); 626 INIT_LIST_HEAD(&u->watches);
551 INIT_LIST_HEAD(&u->read_buffers); 627 INIT_LIST_HEAD(&u->read_buffers);
@@ -562,32 +638,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
562static int xenbus_file_release(struct inode *inode, struct file *filp) 638static int xenbus_file_release(struct inode *inode, struct file *filp)
563{ 639{
564 struct xenbus_file_priv *u = filp->private_data; 640 struct xenbus_file_priv *u = filp->private_data;
565 struct xenbus_transaction_holder *trans, *tmp;
566 struct watch_adapter *watch, *tmp_watch;
567 struct read_buffer *rb, *tmp_rb;
568
569 /*
570 * No need for locking here because there are no other users,
571 * by definition.
572 */
573 641
574 list_for_each_entry_safe(trans, tmp, &u->transactions, list) { 642 kref_put(&u->kref, xenbus_file_free);
575 xenbus_transaction_end(trans->handle, 1);
576 list_del(&trans->list);
577 kfree(trans);
578 }
579
580 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
581 unregister_xenbus_watch(&watch->watch);
582 list_del(&watch->list);
583 free_watch_adapter(watch);
584 }
585
586 list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
587 list_del(&rb->list);
588 kfree(rb);
589 }
590 kfree(u);
591 643
592 return 0; 644 return 0;
593} 645}
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index ebc768f44abe..e46080214955 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fcntl.h> 44#include <linux/fcntl.h>
45#include <linux/kthread.h> 45#include <linux/kthread.h>
46#include <linux/reboot.h>
46#include <linux/rwsem.h> 47#include <linux/rwsem.h>
47#include <linux/mutex.h> 48#include <linux/mutex.h>
48#include <asm/xen/hypervisor.h> 49#include <asm/xen/hypervisor.h>
@@ -50,61 +51,28 @@
50#include <xen/xen.h> 51#include <xen/xen.h>
51#include "xenbus.h" 52#include "xenbus.h"
52 53
53struct xs_stored_msg { 54/*
54 struct list_head list; 55 * Framework to protect suspend/resume handling against normal Xenstore
55 56 * message handling:
56 struct xsd_sockmsg hdr; 57 * During suspend/resume there must be no open transaction and no pending
57 58 * Xenstore request.
58 union { 59 * New watch events happening in this time can be ignored by firing all watches
59 /* Queued replies. */ 60 * after resume.
60 struct { 61 */
61 char *body;
62 } reply;
63
64 /* Queued watch events. */
65 struct {
66 struct xenbus_watch *handle;
67 const char *path;
68 const char *token;
69 } watch;
70 } u;
71};
72 62
73struct xs_handle { 63/* Lock protecting enter/exit critical region. */
74 /* A list of replies. Currently only one will ever be outstanding. */ 64static DEFINE_SPINLOCK(xs_state_lock);
75 struct list_head reply_list; 65/* Number of users in critical region (protected by xs_state_lock). */
76 spinlock_t reply_lock; 66static unsigned int xs_state_users;
77 wait_queue_head_t reply_waitq; 67/* Suspend handler waiting or already active (protected by xs_state_lock)? */
78 68static int xs_suspend_active;
79 /* 69/* Unique Xenstore request id (protected by xs_state_lock). */
80 * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. 70static uint32_t xs_request_id;
81 * response_mutex is never taken simultaneously with the other three.
82 *
83 * transaction_mutex must be held before incrementing
84 * transaction_count. The mutex is held when a suspend is in
85 * progress to prevent new transactions starting.
86 *
87 * When decrementing transaction_count to zero the wait queue
88 * should be woken up, the suspend code waits for count to
89 * reach zero.
90 */
91
92 /* One request at a time. */
93 struct mutex request_mutex;
94
95 /* Protect xenbus reader thread against save/restore. */
96 struct mutex response_mutex;
97
98 /* Protect transactions against save/restore. */
99 struct mutex transaction_mutex;
100 atomic_t transaction_count;
101 wait_queue_head_t transaction_wq;
102
103 /* Protect watch (de)register against save/restore. */
104 struct rw_semaphore watch_mutex;
105};
106 71
107static struct xs_handle xs_state; 72/* Wait queue for all callers waiting for critical region to become usable. */
73static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq);
74/* Wait queue for suspend handling waiting for critical region being empty. */
75static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq);
108 76
109/* List of registered watches, and a lock to protect it. */ 77/* List of registered watches, and a lock to protect it. */
110static LIST_HEAD(watches); 78static LIST_HEAD(watches);
@@ -114,6 +82,9 @@ static DEFINE_SPINLOCK(watches_lock);
114static LIST_HEAD(watch_events); 82static LIST_HEAD(watch_events);
115static DEFINE_SPINLOCK(watch_events_lock); 83static DEFINE_SPINLOCK(watch_events_lock);
116 84
85/* Protect watch (de)register against save/restore. */
86static DECLARE_RWSEM(xs_watch_rwsem);
87
117/* 88/*
118 * Details of the xenwatch callback kernel thread. The thread waits on the 89 * Details of the xenwatch callback kernel thread. The thread waits on the
119 * watch_events_waitq for work to do (queued on watch_events list). When it 90 * watch_events_waitq for work to do (queued on watch_events list). When it
@@ -124,6 +95,59 @@ static pid_t xenwatch_pid;
124static DEFINE_MUTEX(xenwatch_mutex); 95static DEFINE_MUTEX(xenwatch_mutex);
125static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); 96static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
126 97
98static void xs_suspend_enter(void)
99{
100 spin_lock(&xs_state_lock);
101 xs_suspend_active++;
102 spin_unlock(&xs_state_lock);
103 wait_event(xs_state_exit_wq, xs_state_users == 0);
104}
105
106static void xs_suspend_exit(void)
107{
108 spin_lock(&xs_state_lock);
109 xs_suspend_active--;
110 spin_unlock(&xs_state_lock);
111 wake_up_all(&xs_state_enter_wq);
112}
113
114static uint32_t xs_request_enter(struct xb_req_data *req)
115{
116 uint32_t rq_id;
117
118 req->type = req->msg.type;
119
120 spin_lock(&xs_state_lock);
121
122 while (!xs_state_users && xs_suspend_active) {
123 spin_unlock(&xs_state_lock);
124 wait_event(xs_state_enter_wq, xs_suspend_active == 0);
125 spin_lock(&xs_state_lock);
126 }
127
128 if (req->type == XS_TRANSACTION_START)
129 xs_state_users++;
130 xs_state_users++;
131 rq_id = xs_request_id++;
132
133 spin_unlock(&xs_state_lock);
134
135 return rq_id;
136}
137
138void xs_request_exit(struct xb_req_data *req)
139{
140 spin_lock(&xs_state_lock);
141 xs_state_users--;
142 if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
143 req->type == XS_TRANSACTION_END)
144 xs_state_users--;
145 spin_unlock(&xs_state_lock);
146
147 if (xs_suspend_active && !xs_state_users)
148 wake_up(&xs_state_exit_wq);
149}
150
127static int get_error(const char *errorstring) 151static int get_error(const char *errorstring)
128{ 152{
129 unsigned int i; 153 unsigned int i;
@@ -161,21 +185,24 @@ static bool xenbus_ok(void)
161 } 185 }
162 return false; 186 return false;
163} 187}
164static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) 188
189static bool test_reply(struct xb_req_data *req)
165{ 190{
166 struct xs_stored_msg *msg; 191 if (req->state == xb_req_state_got_reply || !xenbus_ok())
167 char *body; 192 return true;
168 193
169 spin_lock(&xs_state.reply_lock); 194 /* Make sure to reread req->state each time. */
195 barrier();
170 196
171 while (list_empty(&xs_state.reply_list)) { 197 return false;
172 spin_unlock(&xs_state.reply_lock); 198}
173 if (xenbus_ok()) 199
174 /* XXX FIXME: Avoid synchronous wait for response here. */ 200static void *read_reply(struct xb_req_data *req)
175 wait_event_timeout(xs_state.reply_waitq, 201{
176 !list_empty(&xs_state.reply_list), 202 while (req->state != xb_req_state_got_reply) {
177 msecs_to_jiffies(500)); 203 wait_event(req->wq, test_reply(req));
178 else { 204
205 if (!xenbus_ok())
179 /* 206 /*
180 * If we are in the process of being shut-down there is 207 * If we are in the process of being shut-down there is
181 * no point of trying to contact XenBus - it is either 208 * no point of trying to contact XenBus - it is either
@@ -183,76 +210,82 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
183 * has been killed or is unreachable. 210 * has been killed or is unreachable.
184 */ 211 */
185 return ERR_PTR(-EIO); 212 return ERR_PTR(-EIO);
186 } 213 if (req->err)
187 spin_lock(&xs_state.reply_lock); 214 return ERR_PTR(req->err);
215
188 } 216 }
189 217
190 msg = list_entry(xs_state.reply_list.next, 218 return req->body;
191 struct xs_stored_msg, list); 219}
192 list_del(&msg->list);
193 220
194 spin_unlock(&xs_state.reply_lock); 221static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
222{
223 bool notify;
195 224
196 *type = msg->hdr.type; 225 req->msg = *msg;
197 if (len) 226 req->err = 0;
198 *len = msg->hdr.len; 227 req->state = xb_req_state_queued;
199 body = msg->u.reply.body; 228 init_waitqueue_head(&req->wq);
200 229
201 kfree(msg); 230 req->msg.req_id = xs_request_enter(req);
202 231
203 return body; 232 mutex_lock(&xb_write_mutex);
204} 233 list_add_tail(&req->list, &xb_write_list);
234 notify = list_is_singular(&xb_write_list);
235 mutex_unlock(&xb_write_mutex);
205 236
206static void transaction_start(void) 237 if (notify)
207{ 238 wake_up(&xb_waitq);
208 mutex_lock(&xs_state.transaction_mutex);
209 atomic_inc(&xs_state.transaction_count);
210 mutex_unlock(&xs_state.transaction_mutex);
211} 239}
212 240
213static void transaction_end(void) 241static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg)
214{ 242{
215 if (atomic_dec_and_test(&xs_state.transaction_count)) 243 void *ret;
216 wake_up(&xs_state.transaction_wq);
217}
218 244
219static void transaction_suspend(void) 245 ret = read_reply(req);
220{ 246
221 mutex_lock(&xs_state.transaction_mutex); 247 xs_request_exit(req);
222 wait_event(xs_state.transaction_wq, 248
223 atomic_read(&xs_state.transaction_count) == 0); 249 msg->type = req->msg.type;
250 msg->len = req->msg.len;
251
252 mutex_lock(&xb_write_mutex);
253 if (req->state == xb_req_state_queued ||
254 req->state == xb_req_state_wait_reply)
255 req->state = xb_req_state_aborted;
256 else
257 kfree(req);
258 mutex_unlock(&xb_write_mutex);
259
260 return ret;
224} 261}
225 262
226static void transaction_resume(void) 263static void xs_wake_up(struct xb_req_data *req)
227{ 264{
228 mutex_unlock(&xs_state.transaction_mutex); 265 wake_up(&req->wq);
229} 266}
230 267
231void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) 268int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par)
232{ 269{
233 void *ret; 270 struct xb_req_data *req;
234 enum xsd_sockmsg_type type = msg->type; 271 struct kvec *vec;
235 int err;
236 272
237 if (type == XS_TRANSACTION_START) 273 req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL);
238 transaction_start(); 274 if (!req)
239 275 return -ENOMEM;
240 mutex_lock(&xs_state.request_mutex);
241 276
242 err = xb_write(msg, sizeof(*msg) + msg->len); 277 vec = (struct kvec *)(req + 1);
243 if (err) { 278 vec->iov_len = msg->len;
244 msg->type = XS_ERROR; 279 vec->iov_base = msg + 1;
245 ret = ERR_PTR(err);
246 } else
247 ret = read_reply(&msg->type, &msg->len);
248 280
249 mutex_unlock(&xs_state.request_mutex); 281 req->vec = vec;
282 req->num_vecs = 1;
283 req->cb = xenbus_dev_queue_reply;
284 req->par = par;
250 285
251 if ((msg->type == XS_TRANSACTION_END) || 286 xs_send(req, msg);
252 ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR)))
253 transaction_end();
254 287
255 return ret; 288 return 0;
256} 289}
257EXPORT_SYMBOL(xenbus_dev_request_and_reply); 290EXPORT_SYMBOL(xenbus_dev_request_and_reply);
258 291
@@ -263,37 +296,31 @@ static void *xs_talkv(struct xenbus_transaction t,
263 unsigned int num_vecs, 296 unsigned int num_vecs,
264 unsigned int *len) 297 unsigned int *len)
265{ 298{
299 struct xb_req_data *req;
266 struct xsd_sockmsg msg; 300 struct xsd_sockmsg msg;
267 void *ret = NULL; 301 void *ret = NULL;
268 unsigned int i; 302 unsigned int i;
269 int err; 303 int err;
270 304
305 req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH);
306 if (!req)
307 return ERR_PTR(-ENOMEM);
308
309 req->vec = iovec;
310 req->num_vecs = num_vecs;
311 req->cb = xs_wake_up;
312
271 msg.tx_id = t.id; 313 msg.tx_id = t.id;
272 msg.req_id = 0;
273 msg.type = type; 314 msg.type = type;
274 msg.len = 0; 315 msg.len = 0;
275 for (i = 0; i < num_vecs; i++) 316 for (i = 0; i < num_vecs; i++)
276 msg.len += iovec[i].iov_len; 317 msg.len += iovec[i].iov_len;
277 318
278 mutex_lock(&xs_state.request_mutex); 319 xs_send(req, &msg);
279
280 err = xb_write(&msg, sizeof(msg));
281 if (err) {
282 mutex_unlock(&xs_state.request_mutex);
283 return ERR_PTR(err);
284 }
285
286 for (i = 0; i < num_vecs; i++) {
287 err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
288 if (err) {
289 mutex_unlock(&xs_state.request_mutex);
290 return ERR_PTR(err);
291 }
292 }
293
294 ret = read_reply(&msg.type, len);
295 320
296 mutex_unlock(&xs_state.request_mutex); 321 ret = xs_wait_for_reply(req, &msg);
322 if (len)
323 *len = msg.len;
297 324
298 if (IS_ERR(ret)) 325 if (IS_ERR(ret))
299 return ret; 326 return ret;
@@ -500,13 +527,9 @@ int xenbus_transaction_start(struct xenbus_transaction *t)
500{ 527{
501 char *id_str; 528 char *id_str;
502 529
503 transaction_start();
504
505 id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); 530 id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
506 if (IS_ERR(id_str)) { 531 if (IS_ERR(id_str))
507 transaction_end();
508 return PTR_ERR(id_str); 532 return PTR_ERR(id_str);
509 }
510 533
511 t->id = simple_strtoul(id_str, NULL, 0); 534 t->id = simple_strtoul(id_str, NULL, 0);
512 kfree(id_str); 535 kfree(id_str);
@@ -520,18 +543,13 @@ EXPORT_SYMBOL_GPL(xenbus_transaction_start);
520int xenbus_transaction_end(struct xenbus_transaction t, int abort) 543int xenbus_transaction_end(struct xenbus_transaction t, int abort)
521{ 544{
522 char abortstr[2]; 545 char abortstr[2];
523 int err;
524 546
525 if (abort) 547 if (abort)
526 strcpy(abortstr, "F"); 548 strcpy(abortstr, "F");
527 else 549 else
528 strcpy(abortstr, "T"); 550 strcpy(abortstr, "T");
529 551
530 err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); 552 return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
531
532 transaction_end();
533
534 return err;
535} 553}
536EXPORT_SYMBOL_GPL(xenbus_transaction_end); 554EXPORT_SYMBOL_GPL(xenbus_transaction_end);
537 555
@@ -664,6 +682,30 @@ static struct xenbus_watch *find_watch(const char *token)
664 682
665 return NULL; 683 return NULL;
666} 684}
685
686int xs_watch_msg(struct xs_watch_event *event)
687{
688 if (count_strings(event->body, event->len) != 2) {
689 kfree(event);
690 return -EINVAL;
691 }
692 event->path = (const char *)event->body;
693 event->token = (const char *)strchr(event->body, '\0') + 1;
694
695 spin_lock(&watches_lock);
696 event->handle = find_watch(event->token);
697 if (event->handle != NULL) {
698 spin_lock(&watch_events_lock);
699 list_add_tail(&event->list, &watch_events);
700 wake_up(&watch_events_waitq);
701 spin_unlock(&watch_events_lock);
702 } else
703 kfree(event);
704 spin_unlock(&watches_lock);
705
706 return 0;
707}
708
667/* 709/*
668 * Certain older XenBus toolstack cannot handle reading values that are 710 * Certain older XenBus toolstack cannot handle reading values that are
669 * not populated. Some Xen 3.4 installation are incapable of doing this 711 * not populated. Some Xen 3.4 installation are incapable of doing this
@@ -712,7 +754,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
712 754
713 sprintf(token, "%lX", (long)watch); 755 sprintf(token, "%lX", (long)watch);
714 756
715 down_read(&xs_state.watch_mutex); 757 down_read(&xs_watch_rwsem);
716 758
717 spin_lock(&watches_lock); 759 spin_lock(&watches_lock);
718 BUG_ON(find_watch(token)); 760 BUG_ON(find_watch(token));
@@ -727,7 +769,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
727 spin_unlock(&watches_lock); 769 spin_unlock(&watches_lock);
728 } 770 }
729 771
730 up_read(&xs_state.watch_mutex); 772 up_read(&xs_watch_rwsem);
731 773
732 return err; 774 return err;
733} 775}
@@ -735,13 +777,13 @@ EXPORT_SYMBOL_GPL(register_xenbus_watch);
735 777
736void unregister_xenbus_watch(struct xenbus_watch *watch) 778void unregister_xenbus_watch(struct xenbus_watch *watch)
737{ 779{
738 struct xs_stored_msg *msg, *tmp; 780 struct xs_watch_event *event, *tmp;
739 char token[sizeof(watch) * 2 + 1]; 781 char token[sizeof(watch) * 2 + 1];
740 int err; 782 int err;
741 783
742 sprintf(token, "%lX", (long)watch); 784 sprintf(token, "%lX", (long)watch);
743 785
744 down_read(&xs_state.watch_mutex); 786 down_read(&xs_watch_rwsem);
745 787
746 spin_lock(&watches_lock); 788 spin_lock(&watches_lock);
747 BUG_ON(!find_watch(token)); 789 BUG_ON(!find_watch(token));
@@ -752,7 +794,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
752 if (err) 794 if (err)
753 pr_warn("Failed to release watch %s: %i\n", watch->node, err); 795 pr_warn("Failed to release watch %s: %i\n", watch->node, err);
754 796
755 up_read(&xs_state.watch_mutex); 797 up_read(&xs_watch_rwsem);
756 798
757 /* Make sure there are no callbacks running currently (unless 799 /* Make sure there are no callbacks running currently (unless
758 its us) */ 800 its us) */
@@ -761,12 +803,11 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
761 803
762 /* Cancel pending watch events. */ 804 /* Cancel pending watch events. */
763 spin_lock(&watch_events_lock); 805 spin_lock(&watch_events_lock);
764 list_for_each_entry_safe(msg, tmp, &watch_events, list) { 806 list_for_each_entry_safe(event, tmp, &watch_events, list) {
765 if (msg->u.watch.handle != watch) 807 if (event->handle != watch)
766 continue; 808 continue;
767 list_del(&msg->list); 809 list_del(&event->list);
768 kfree(msg->u.watch.path); 810 kfree(event);
769 kfree(msg);
770 } 811 }
771 spin_unlock(&watch_events_lock); 812 spin_unlock(&watch_events_lock);
772 813
@@ -777,10 +818,10 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
777 818
778void xs_suspend(void) 819void xs_suspend(void)
779{ 820{
780 transaction_suspend(); 821 xs_suspend_enter();
781 down_write(&xs_state.watch_mutex); 822
782 mutex_lock(&xs_state.request_mutex); 823 down_write(&xs_watch_rwsem);
783 mutex_lock(&xs_state.response_mutex); 824 mutex_lock(&xs_response_mutex);
784} 825}
785 826
786void xs_resume(void) 827void xs_resume(void)
@@ -790,31 +831,31 @@ void xs_resume(void)
790 831
791 xb_init_comms(); 832 xb_init_comms();
792 833
793 mutex_unlock(&xs_state.response_mutex); 834 mutex_unlock(&xs_response_mutex);
794 mutex_unlock(&xs_state.request_mutex); 835
795 transaction_resume(); 836 xs_suspend_exit();
796 837
797 /* No need for watches_lock: the watch_mutex is sufficient. */ 838 /* No need for watches_lock: the xs_watch_rwsem is sufficient. */
798 list_for_each_entry(watch, &watches, list) { 839 list_for_each_entry(watch, &watches, list) {
799 sprintf(token, "%lX", (long)watch); 840 sprintf(token, "%lX", (long)watch);
800 xs_watch(watch->node, token); 841 xs_watch(watch->node, token);
801 } 842 }
802 843
803 up_write(&xs_state.watch_mutex); 844 up_write(&xs_watch_rwsem);
804} 845}
805 846
806void xs_suspend_cancel(void) 847void xs_suspend_cancel(void)
807{ 848{
808 mutex_unlock(&xs_state.response_mutex); 849 mutex_unlock(&xs_response_mutex);
809 mutex_unlock(&xs_state.request_mutex); 850 up_write(&xs_watch_rwsem);
810 up_write(&xs_state.watch_mutex); 851
811 mutex_unlock(&xs_state.transaction_mutex); 852 xs_suspend_exit();
812} 853}
813 854
814static int xenwatch_thread(void *unused) 855static int xenwatch_thread(void *unused)
815{ 856{
816 struct list_head *ent; 857 struct list_head *ent;
817 struct xs_stored_msg *msg; 858 struct xs_watch_event *event;
818 859
819 for (;;) { 860 for (;;) {
820 wait_event_interruptible(watch_events_waitq, 861 wait_event_interruptible(watch_events_waitq,
@@ -832,12 +873,10 @@ static int xenwatch_thread(void *unused)
832 spin_unlock(&watch_events_lock); 873 spin_unlock(&watch_events_lock);
833 874
834 if (ent != &watch_events) { 875 if (ent != &watch_events) {
835 msg = list_entry(ent, struct xs_stored_msg, list); 876 event = list_entry(ent, struct xs_watch_event, list);
836 msg->u.watch.handle->callback(msg->u.watch.handle, 877 event->handle->callback(event->handle, event->path,
837 msg->u.watch.path, 878 event->token);
838 msg->u.watch.token); 879 kfree(event);
839 kfree(msg->u.watch.path);
840 kfree(msg);
841 } 880 }
842 881
843 mutex_unlock(&xenwatch_mutex); 882 mutex_unlock(&xenwatch_mutex);
@@ -846,126 +885,37 @@ static int xenwatch_thread(void *unused)
846 return 0; 885 return 0;
847} 886}
848 887
849static int process_msg(void) 888/*
889 * Wake up all threads waiting for a xenstore reply. In case of shutdown all
890 * pending replies will be marked as "aborted" in order to let the waiters
891 * return in spite of xenstore possibly no longer being able to reply. This
892 * will avoid blocking shutdown by a thread waiting for xenstore but being
893 * necessary for shutdown processing to proceed.
894 */
895static int xs_reboot_notify(struct notifier_block *nb,
896 unsigned long code, void *unused)
850{ 897{
851 struct xs_stored_msg *msg; 898 struct xb_req_data *req;
852 char *body;
853 int err;
854
855 /*
856 * We must disallow save/restore while reading a xenstore message.
857 * A partial read across s/r leaves us out of sync with xenstored.
858 */
859 for (;;) {
860 err = xb_wait_for_data_to_read();
861 if (err)
862 return err;
863 mutex_lock(&xs_state.response_mutex);
864 if (xb_data_to_read())
865 break;
866 /* We raced with save/restore: pending data 'disappeared'. */
867 mutex_unlock(&xs_state.response_mutex);
868 }
869 899
870 900 mutex_lock(&xb_write_mutex);
871 msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH); 901 list_for_each_entry(req, &xs_reply_list, list)
872 if (msg == NULL) { 902 wake_up(&req->wq);
873 err = -ENOMEM; 903 list_for_each_entry(req, &xb_write_list, list)
874 goto out; 904 wake_up(&req->wq);
875 } 905 mutex_unlock(&xb_write_mutex);
876 906 return NOTIFY_DONE;
877 err = xb_read(&msg->hdr, sizeof(msg->hdr));
878 if (err) {
879 kfree(msg);
880 goto out;
881 }
882
883 if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
884 kfree(msg);
885 err = -EINVAL;
886 goto out;
887 }
888
889 body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
890 if (body == NULL) {
891 kfree(msg);
892 err = -ENOMEM;
893 goto out;
894 }
895
896 err = xb_read(body, msg->hdr.len);
897 if (err) {
898 kfree(body);
899 kfree(msg);
900 goto out;
901 }
902 body[msg->hdr.len] = '\0';
903
904 if (msg->hdr.type == XS_WATCH_EVENT) {
905 if (count_strings(body, msg->hdr.len) != 2) {
906 err = -EINVAL;
907 kfree(msg);
908 kfree(body);
909 goto out;
910 }
911 msg->u.watch.path = (const char *)body;
912 msg->u.watch.token = (const char *)strchr(body, '\0') + 1;
913
914 spin_lock(&watches_lock);
915 msg->u.watch.handle = find_watch(msg->u.watch.token);
916 if (msg->u.watch.handle != NULL) {
917 spin_lock(&watch_events_lock);
918 list_add_tail(&msg->list, &watch_events);
919 wake_up(&watch_events_waitq);
920 spin_unlock(&watch_events_lock);
921 } else {
922 kfree(body);
923 kfree(msg);
924 }
925 spin_unlock(&watches_lock);
926 } else {
927 msg->u.reply.body = body;
928 spin_lock(&xs_state.reply_lock);
929 list_add_tail(&msg->list, &xs_state.reply_list);
930 spin_unlock(&xs_state.reply_lock);
931 wake_up(&xs_state.reply_waitq);
932 }
933
934 out:
935 mutex_unlock(&xs_state.response_mutex);
936 return err;
937} 907}
938 908
939static int xenbus_thread(void *unused) 909static struct notifier_block xs_reboot_nb = {
940{ 910 .notifier_call = xs_reboot_notify,
941 int err; 911};
942
943 for (;;) {
944 err = process_msg();
945 if (err)
946 pr_warn("error %d while reading message\n", err);
947 if (kthread_should_stop())
948 break;
949 }
950
951 return 0;
952}
953 912
954int xs_init(void) 913int xs_init(void)
955{ 914{
956 int err; 915 int err;
957 struct task_struct *task; 916 struct task_struct *task;
958 917
959 INIT_LIST_HEAD(&xs_state.reply_list); 918 register_reboot_notifier(&xs_reboot_nb);
960 spin_lock_init(&xs_state.reply_lock);
961 init_waitqueue_head(&xs_state.reply_waitq);
962
963 mutex_init(&xs_state.request_mutex);
964 mutex_init(&xs_state.response_mutex);
965 mutex_init(&xs_state.transaction_mutex);
966 init_rwsem(&xs_state.watch_mutex);
967 atomic_set(&xs_state.transaction_count, 0);
968 init_waitqueue_head(&xs_state.transaction_wq);
969 919
970 /* Initialize the shared memory rings to talk to xenstored */ 920 /* Initialize the shared memory rings to talk to xenstored */
971 err = xb_init_comms(); 921 err = xb_init_comms();
@@ -977,10 +927,6 @@ int xs_init(void)
977 return PTR_ERR(task); 927 return PTR_ERR(task);
978 xenwatch_pid = task->pid; 928 xenwatch_pid = task->pid;
979 929
980 task = kthread_run(xenbus_thread, NULL, "xenbus");
981 if (IS_ERR(task))
982 return PTR_ERR(task);
983
984 /* shutdown watches for kexec boot */ 930 /* shutdown watches for kexec boot */
985 xs_reset_watches(); 931 xs_reset_watches();
986 932