diff options
Diffstat (limited to 'net/9p/trans_virtio.c')
-rw-r--r-- | net/9p/trans_virtio.c | 192 |
1 files changed, 169 insertions, 23 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20e..244e70742183 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -43,13 +43,17 @@ | |||
43 | #include <net/9p/client.h> | 43 | #include <net/9p/client.h> |
44 | #include <net/9p/transport.h> | 44 | #include <net/9p/transport.h> |
45 | #include <linux/scatterlist.h> | 45 | #include <linux/scatterlist.h> |
46 | #include <linux/swap.h> | ||
46 | #include <linux/virtio.h> | 47 | #include <linux/virtio.h> |
47 | #include <linux/virtio_9p.h> | 48 | #include <linux/virtio_9p.h> |
49 | #include "trans_common.h" | ||
48 | 50 | ||
49 | #define VIRTQUEUE_NUM 128 | 51 | #define VIRTQUEUE_NUM 128 |
50 | 52 | ||
51 | /* a single mutex to manage channel initialization and attachment */ | 53 | /* a single mutex to manage channel initialization and attachment */ |
52 | static DEFINE_MUTEX(virtio_9p_lock); | 54 | static DEFINE_MUTEX(virtio_9p_lock); |
55 | static DECLARE_WAIT_QUEUE_HEAD(vp_wq); | ||
56 | static atomic_t vp_pinned = ATOMIC_INIT(0); | ||
53 | 57 | ||
54 | /** | 58 | /** |
55 | * struct virtio_chan - per-instance transport information | 59 | * struct virtio_chan - per-instance transport information |
@@ -77,7 +81,10 @@ struct virtio_chan { | |||
77 | struct virtqueue *vq; | 81 | struct virtqueue *vq; |
78 | int ring_bufs_avail; | 82 | int ring_bufs_avail; |
79 | wait_queue_head_t *vc_wq; | 83 | wait_queue_head_t *vc_wq; |
80 | 84 | /* This is global limit. Since we don't have a global structure, | |
85 | * will be placing it in each channel. | ||
86 | */ | ||
87 | int p9_max_pages; | ||
81 | /* Scatterlist: can be too big for stack. */ | 88 | /* Scatterlist: can be too big for stack. */ |
82 | struct scatterlist sg[VIRTQUEUE_NUM]; | 89 | struct scatterlist sg[VIRTQUEUE_NUM]; |
83 | 90 | ||
@@ -140,26 +147,36 @@ static void req_done(struct virtqueue *vq) | |||
140 | 147 | ||
141 | P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); | 148 | P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); |
142 | 149 | ||
143 | do { | 150 | while (1) { |
144 | spin_lock_irqsave(&chan->lock, flags); | 151 | spin_lock_irqsave(&chan->lock, flags); |
145 | rc = virtqueue_get_buf(chan->vq, &len); | 152 | rc = virtqueue_get_buf(chan->vq, &len); |
146 | 153 | ||
147 | if (rc != NULL) { | 154 | if (rc == NULL) { |
148 | if (!chan->ring_bufs_avail) { | ||
149 | chan->ring_bufs_avail = 1; | ||
150 | wake_up(chan->vc_wq); | ||
151 | } | ||
152 | spin_unlock_irqrestore(&chan->lock, flags); | ||
153 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); | ||
154 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", | ||
155 | rc->tag); | ||
156 | req = p9_tag_lookup(chan->client, rc->tag); | ||
157 | req->status = REQ_STATUS_RCVD; | ||
158 | p9_client_cb(chan->client, req); | ||
159 | } else { | ||
160 | spin_unlock_irqrestore(&chan->lock, flags); | 155 | spin_unlock_irqrestore(&chan->lock, flags); |
156 | break; | ||
157 | } | ||
158 | |||
159 | chan->ring_bufs_avail = 1; | ||
160 | spin_unlock_irqrestore(&chan->lock, flags); | ||
161 | /* Wakeup if anyone waiting for VirtIO ring space. */ | ||
162 | wake_up(chan->vc_wq); | ||
163 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); | ||
164 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); | ||
165 | req = p9_tag_lookup(chan->client, rc->tag); | ||
166 | if (req->tc->private) { | ||
167 | struct trans_rpage_info *rp = req->tc->private; | ||
168 | int p = rp->rp_nr_pages; | ||
169 | /*Release pages */ | ||
170 | p9_release_req_pages(rp); | ||
171 | atomic_sub(p, &vp_pinned); | ||
172 | wake_up(&vp_wq); | ||
173 | if (rp->rp_alloc) | ||
174 | kfree(rp); | ||
175 | req->tc->private = NULL; | ||
161 | } | 176 | } |
162 | } while (rc != NULL); | 177 | req->status = REQ_STATUS_RCVD; |
178 | p9_client_cb(chan->client, req); | ||
179 | } | ||
163 | } | 180 | } |
164 | 181 | ||
165 | /** | 182 | /** |
@@ -203,6 +220,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) | |||
203 | } | 220 | } |
204 | 221 | ||
205 | /** | 222 | /** |
223 | * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, | ||
224 | * this takes a list of pages. | ||
225 | * @sg: scatter/gather list to pack into | ||
226 | * @start: which segment of the sg_list to start at | ||
227 | * @pdata_off: Offset into the first page | ||
228 | * @**pdata: a list of pages to add into sg. | ||
229 | * @count: amount of data to pack into the scatter/gather list | ||
230 | */ | ||
231 | static int | ||
232 | pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, | ||
233 | struct page **pdata, int count) | ||
234 | { | ||
235 | int s; | ||
236 | int i = 0; | ||
237 | int index = start; | ||
238 | |||
239 | if (pdata_off) { | ||
240 | s = min((int)(PAGE_SIZE - pdata_off), count); | ||
241 | sg_set_page(&sg[index++], pdata[i++], s, pdata_off); | ||
242 | count -= s; | ||
243 | } | ||
244 | |||
245 | while (count) { | ||
246 | BUG_ON(index > limit); | ||
247 | s = min((int)PAGE_SIZE, count); | ||
248 | sg_set_page(&sg[index++], pdata[i++], s, 0); | ||
249 | count -= s; | ||
250 | } | ||
251 | return index-start; | ||
252 | } | ||
253 | |||
254 | /** | ||
206 | * p9_virtio_request - issue a request | 255 | * p9_virtio_request - issue a request |
207 | * @client: client instance issuing the request | 256 | * @client: client instance issuing the request |
208 | * @req: request to be issued | 257 | * @req: request to be issued |
@@ -212,22 +261,114 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) | |||
212 | static int | 261 | static int |
213 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) | 262 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) |
214 | { | 263 | { |
215 | int in, out; | 264 | int in, out, inp, outp; |
216 | struct virtio_chan *chan = client->trans; | 265 | struct virtio_chan *chan = client->trans; |
217 | char *rdata = (char *)req->rc+sizeof(struct p9_fcall); | 266 | char *rdata = (char *)req->rc+sizeof(struct p9_fcall); |
218 | unsigned long flags; | 267 | unsigned long flags; |
219 | int err; | 268 | size_t pdata_off = 0; |
269 | struct trans_rpage_info *rpinfo = NULL; | ||
270 | int err, pdata_len = 0; | ||
220 | 271 | ||
221 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); | 272 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); |
222 | 273 | ||
223 | req_retry: | ||
224 | req->status = REQ_STATUS_SENT; | 274 | req->status = REQ_STATUS_SENT; |
225 | 275 | ||
276 | if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { | ||
277 | int nr_pages = p9_nr_pages(req); | ||
278 | int rpinfo_size = sizeof(struct trans_rpage_info) + | ||
279 | sizeof(struct page *) * nr_pages; | ||
280 | |||
281 | if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { | ||
282 | err = wait_event_interruptible(vp_wq, | ||
283 | atomic_read(&vp_pinned) < chan->p9_max_pages); | ||
284 | if (err == -ERESTARTSYS) | ||
285 | return err; | ||
286 | P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); | ||
287 | } | ||
288 | |||
289 | if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { | ||
290 | /* We can use sdata */ | ||
291 | req->tc->private = req->tc->sdata + req->tc->size; | ||
292 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
293 | rpinfo->rp_alloc = 0; | ||
294 | } else { | ||
295 | req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); | ||
296 | if (!req->tc->private) { | ||
297 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " | ||
298 | "private kmalloc returned NULL"); | ||
299 | return -ENOMEM; | ||
300 | } | ||
301 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
302 | rpinfo->rp_alloc = 1; | ||
303 | } | ||
304 | |||
305 | err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, | ||
306 | req->tc->id == P9_TREAD ? 1 : 0); | ||
307 | if (err < 0) { | ||
308 | if (rpinfo->rp_alloc) | ||
309 | kfree(rpinfo); | ||
310 | return err; | ||
311 | } else { | ||
312 | atomic_add(rpinfo->rp_nr_pages, &vp_pinned); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | req_retry_pinned: | ||
226 | spin_lock_irqsave(&chan->lock, flags); | 317 | spin_lock_irqsave(&chan->lock, flags); |
318 | |||
319 | /* Handle out VirtIO ring buffers */ | ||
227 | out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, | 320 | out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, |
228 | req->tc->size); | 321 | req->tc->size); |
229 | in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, | 322 | |
230 | client->msize); | 323 | if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { |
324 | /* We have additional write payload buffer to take care */ | ||
325 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | ||
326 | outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, | ||
327 | pdata_off, rpinfo->rp_data, pdata_len); | ||
328 | } else { | ||
329 | char *pbuf; | ||
330 | if (req->tc->pubuf) | ||
331 | pbuf = (__force char *) req->tc->pubuf; | ||
332 | else | ||
333 | pbuf = req->tc->pkbuf; | ||
334 | outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, | ||
335 | req->tc->pbuf_size); | ||
336 | } | ||
337 | out += outp; | ||
338 | } | ||
339 | |||
340 | /* Handle in VirtIO ring buffers */ | ||
341 | if (req->tc->pbuf_size && | ||
342 | ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { | ||
343 | /* | ||
344 | * Take care of additional Read payload. | ||
345 | * 11 is the read/write header = PDU Header(7) + IO Size (4). | ||
346 | * Arrange in such a way that server places header in the | ||
347 | * alloced memory and payload onto the user buffer. | ||
348 | */ | ||
349 | inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); | ||
350 | /* | ||
351 | * Running executables in the filesystem may result in | ||
352 | * a read request with kernel buffer as opposed to user buffer. | ||
353 | */ | ||
354 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | ||
355 | in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
356 | pdata_off, rpinfo->rp_data, pdata_len); | ||
357 | } else { | ||
358 | char *pbuf; | ||
359 | if (req->tc->pubuf) | ||
360 | pbuf = (__force char *) req->tc->pubuf; | ||
361 | else | ||
362 | pbuf = req->tc->pkbuf; | ||
363 | |||
364 | in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
365 | pbuf, req->tc->pbuf_size); | ||
366 | } | ||
367 | in += inp; | ||
368 | } else { | ||
369 | in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, | ||
370 | client->msize); | ||
371 | } | ||
231 | 372 | ||
232 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); | 373 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); |
233 | if (err < 0) { | 374 | if (err < 0) { |
@@ -240,12 +381,14 @@ req_retry: | |||
240 | return err; | 381 | return err; |
241 | 382 | ||
242 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); | 383 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); |
243 | goto req_retry; | 384 | goto req_retry_pinned; |
244 | } else { | 385 | } else { |
245 | spin_unlock_irqrestore(&chan->lock, flags); | 386 | spin_unlock_irqrestore(&chan->lock, flags); |
246 | P9_DPRINTK(P9_DEBUG_TRANS, | 387 | P9_DPRINTK(P9_DEBUG_TRANS, |
247 | "9p debug: " | 388 | "9p debug: " |
248 | "virtio rpc add_buf returned failure"); | 389 | "virtio rpc add_buf returned failure"); |
390 | if (rpinfo && rpinfo->rp_alloc) | ||
391 | kfree(rpinfo); | ||
249 | return -EIO; | 392 | return -EIO; |
250 | } | 393 | } |
251 | } | 394 | } |
@@ -335,6 +478,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) | |||
335 | } | 478 | } |
336 | init_waitqueue_head(chan->vc_wq); | 479 | init_waitqueue_head(chan->vc_wq); |
337 | chan->ring_bufs_avail = 1; | 480 | chan->ring_bufs_avail = 1; |
481 | /* Ceiling limit to avoid denial of service attacks */ | ||
482 | chan->p9_max_pages = nr_free_buffer_pages()/4; | ||
338 | 483 | ||
339 | mutex_lock(&virtio_9p_lock); | 484 | mutex_lock(&virtio_9p_lock); |
340 | list_add_tail(&chan->chan_list, &virtio_chan_list); | 485 | list_add_tail(&chan->chan_list, &virtio_chan_list); |
@@ -448,6 +593,7 @@ static struct p9_trans_module p9_virtio_trans = { | |||
448 | .request = p9_virtio_request, | 593 | .request = p9_virtio_request, |
449 | .cancel = p9_virtio_cancel, | 594 | .cancel = p9_virtio_cancel, |
450 | .maxsize = PAGE_SIZE*16, | 595 | .maxsize = PAGE_SIZE*16, |
596 | .pref = P9_TRANS_PREF_PAYLOAD_SEP, | ||
451 | .def = 0, | 597 | .def = 0, |
452 | .owner = THIS_MODULE, | 598 | .owner = THIS_MODULE, |
453 | }; | 599 | }; |