diff options
Diffstat (limited to 'net/9p/trans_virtio.c')
-rw-r--r-- | net/9p/trans_virtio.c | 319 |
1 files changed, 189 insertions, 130 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583fcc73..32aa9834229c 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq) | |||
150 | while (1) { | 150 | while (1) { |
151 | spin_lock_irqsave(&chan->lock, flags); | 151 | spin_lock_irqsave(&chan->lock, flags); |
152 | rc = virtqueue_get_buf(chan->vq, &len); | 152 | rc = virtqueue_get_buf(chan->vq, &len); |
153 | |||
154 | if (rc == NULL) { | 153 | if (rc == NULL) { |
155 | spin_unlock_irqrestore(&chan->lock, flags); | 154 | spin_unlock_irqrestore(&chan->lock, flags); |
156 | break; | 155 | break; |
157 | } | 156 | } |
158 | |||
159 | chan->ring_bufs_avail = 1; | 157 | chan->ring_bufs_avail = 1; |
160 | spin_unlock_irqrestore(&chan->lock, flags); | 158 | spin_unlock_irqrestore(&chan->lock, flags); |
161 | /* Wakeup if anyone waiting for VirtIO ring space. */ | 159 | /* Wakeup if anyone waiting for VirtIO ring space. */ |
@@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq) | |||
163 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); | 161 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); |
164 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); | 162 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); |
165 | req = p9_tag_lookup(chan->client, rc->tag); | 163 | req = p9_tag_lookup(chan->client, rc->tag); |
166 | if (req->tc->private) { | ||
167 | struct trans_rpage_info *rp = req->tc->private; | ||
168 | int p = rp->rp_nr_pages; | ||
169 | /*Release pages */ | ||
170 | p9_release_req_pages(rp); | ||
171 | atomic_sub(p, &vp_pinned); | ||
172 | wake_up(&vp_wq); | ||
173 | if (rp->rp_alloc) | ||
174 | kfree(rp); | ||
175 | req->tc->private = NULL; | ||
176 | } | ||
177 | req->status = REQ_STATUS_RCVD; | 164 | req->status = REQ_STATUS_RCVD; |
178 | p9_client_cb(chan->client, req); | 165 | p9_client_cb(chan->client, req); |
179 | } | 166 | } |
@@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq) | |||
193 | * | 180 | * |
194 | */ | 181 | */ |
195 | 182 | ||
196 | static int | 183 | static int pack_sg_list(struct scatterlist *sg, int start, |
197 | pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, | 184 | int limit, char *data, int count) |
198 | int count) | ||
199 | { | 185 | { |
200 | int s; | 186 | int s; |
201 | int index = start; | 187 | int index = start; |
@@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) | |||
224 | * this takes a list of pages. | 210 | * this takes a list of pages. |
225 | * @sg: scatter/gather list to pack into | 211 | * @sg: scatter/gather list to pack into |
226 | * @start: which segment of the sg_list to start at | 212 | * @start: which segment of the sg_list to start at |
227 | * @pdata_off: Offset into the first page | ||
228 | * @**pdata: a list of pages to add into sg. | 213 | * @**pdata: a list of pages to add into sg. |
214 | * @nr_pages: number of pages to pack into the scatter/gather list | ||
215 | * @data: data to pack into scatter/gather list | ||
229 | * @count: amount of data to pack into the scatter/gather list | 216 | * @count: amount of data to pack into the scatter/gather list |
230 | */ | 217 | */ |
231 | static int | 218 | static int |
232 | pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, | 219 | pack_sg_list_p(struct scatterlist *sg, int start, int limit, |
233 | struct page **pdata, int count) | 220 | struct page **pdata, int nr_pages, char *data, int count) |
234 | { | 221 | { |
235 | int s; | 222 | int i = 0, s; |
236 | int i = 0; | 223 | int data_off; |
237 | int index = start; | 224 | int index = start; |
238 | 225 | ||
239 | if (pdata_off) { | 226 | BUG_ON(nr_pages > (limit - start)); |
240 | s = min((int)(PAGE_SIZE - pdata_off), count); | 227 | /* |
241 | sg_set_page(&sg[index++], pdata[i++], s, pdata_off); | 228 | * if the first page doesn't start at |
242 | count -= s; | 229 | * page boundary find the offset |
243 | } | 230 | */ |
244 | 231 | data_off = offset_in_page(data); | |
245 | while (count) { | 232 | while (nr_pages) { |
246 | BUG_ON(index > limit); | 233 | s = rest_of_page(data); |
247 | s = min((int)PAGE_SIZE, count); | 234 | if (s > count) |
248 | sg_set_page(&sg[index++], pdata[i++], s, 0); | 235 | s = count; |
236 | sg_set_page(&sg[index++], pdata[i++], s, data_off); | ||
237 | data_off = 0; | ||
238 | data += s; | ||
249 | count -= s; | 239 | count -= s; |
240 | nr_pages--; | ||
250 | } | 241 | } |
251 | return index-start; | 242 | return index - start; |
252 | } | 243 | } |
253 | 244 | ||
254 | /** | 245 | /** |
@@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, | |||
261 | static int | 252 | static int |
262 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) | 253 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) |
263 | { | 254 | { |
264 | int in, out, inp, outp; | 255 | int err; |
265 | struct virtio_chan *chan = client->trans; | 256 | int in, out; |
266 | unsigned long flags; | 257 | unsigned long flags; |
267 | size_t pdata_off = 0; | 258 | struct virtio_chan *chan = client->trans; |
268 | struct trans_rpage_info *rpinfo = NULL; | ||
269 | int err, pdata_len = 0; | ||
270 | 259 | ||
271 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); | 260 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); |
272 | 261 | ||
273 | req->status = REQ_STATUS_SENT; | 262 | req->status = REQ_STATUS_SENT; |
263 | req_retry: | ||
264 | spin_lock_irqsave(&chan->lock, flags); | ||
265 | |||
266 | /* Handle out VirtIO ring buffers */ | ||
267 | out = pack_sg_list(chan->sg, 0, | ||
268 | VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); | ||
274 | 269 | ||
275 | if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { | 270 | in = pack_sg_list(chan->sg, out, |
276 | int nr_pages = p9_nr_pages(req); | 271 | VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); |
277 | int rpinfo_size = sizeof(struct trans_rpage_info) + | ||
278 | sizeof(struct page *) * nr_pages; | ||
279 | 272 | ||
280 | if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { | 273 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); |
281 | err = wait_event_interruptible(vp_wq, | 274 | if (err < 0) { |
282 | atomic_read(&vp_pinned) < chan->p9_max_pages); | 275 | if (err == -ENOSPC) { |
276 | chan->ring_bufs_avail = 0; | ||
277 | spin_unlock_irqrestore(&chan->lock, flags); | ||
278 | err = wait_event_interruptible(*chan->vc_wq, | ||
279 | chan->ring_bufs_avail); | ||
283 | if (err == -ERESTARTSYS) | 280 | if (err == -ERESTARTSYS) |
284 | return err; | 281 | return err; |
285 | P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); | ||
286 | } | ||
287 | 282 | ||
288 | if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { | 283 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); |
289 | /* We can use sdata */ | 284 | goto req_retry; |
290 | req->tc->private = req->tc->sdata + req->tc->size; | ||
291 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
292 | rpinfo->rp_alloc = 0; | ||
293 | } else { | 285 | } else { |
294 | req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); | 286 | spin_unlock_irqrestore(&chan->lock, flags); |
295 | if (!req->tc->private) { | 287 | P9_DPRINTK(P9_DEBUG_TRANS, |
296 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " | 288 | "9p debug: " |
297 | "private kmalloc returned NULL"); | 289 | "virtio rpc add_buf returned failure"); |
298 | return -ENOMEM; | 290 | return -EIO; |
299 | } | ||
300 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
301 | rpinfo->rp_alloc = 1; | ||
302 | } | 291 | } |
292 | } | ||
293 | virtqueue_kick(chan->vq); | ||
294 | spin_unlock_irqrestore(&chan->lock, flags); | ||
303 | 295 | ||
304 | err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, | 296 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); |
305 | req->tc->id == P9_TREAD ? 1 : 0); | 297 | return 0; |
306 | if (err < 0) { | 298 | } |
307 | if (rpinfo->rp_alloc) | 299 | |
308 | kfree(rpinfo); | 300 | static int p9_get_mapped_pages(struct virtio_chan *chan, |
301 | struct page **pages, char *data, | ||
302 | int nr_pages, int write, int kern_buf) | ||
303 | { | ||
304 | int err; | ||
305 | if (!kern_buf) { | ||
306 | /* | ||
307 | * We allow only p9_max_pages pinned. We wait for the | ||
308 | * Other zc request to finish here | ||
309 | */ | ||
310 | if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { | ||
311 | err = wait_event_interruptible(vp_wq, | ||
312 | (atomic_read(&vp_pinned) < chan->p9_max_pages)); | ||
313 | if (err == -ERESTARTSYS) | ||
314 | return err; | ||
315 | } | ||
316 | err = p9_payload_gup(data, &nr_pages, pages, write); | ||
317 | if (err < 0) | ||
309 | return err; | 318 | return err; |
310 | } else { | 319 | atomic_add(nr_pages, &vp_pinned); |
311 | atomic_add(rpinfo->rp_nr_pages, &vp_pinned); | 320 | } else { |
321 | /* kernel buffer, no need to pin pages */ | ||
322 | int s, index = 0; | ||
323 | int count = nr_pages; | ||
324 | while (nr_pages) { | ||
325 | s = rest_of_page(data); | ||
326 | pages[index++] = virt_to_page(data); | ||
327 | data += s; | ||
328 | nr_pages--; | ||
312 | } | 329 | } |
330 | nr_pages = count; | ||
313 | } | 331 | } |
332 | return nr_pages; | ||
333 | } | ||
314 | 334 | ||
315 | req_retry_pinned: | 335 | /** |
316 | spin_lock_irqsave(&chan->lock, flags); | 336 | * p9_virtio_zc_request - issue a zero copy request |
337 | * @client: client instance issuing the request | ||
338 | * @req: request to be issued | ||
339 | * @uidata: user bffer that should be ued for zero copy read | ||
340 | * @uodata: user buffer that shoud be user for zero copy write | ||
341 | * @inlen: read buffer size | ||
342 | * @olen: write buffer size | ||
343 | * @hdrlen: reader header size, This is the size of response protocol data | ||
344 | * | ||
345 | */ | ||
346 | static int | ||
347 | p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, | ||
348 | char *uidata, char *uodata, int inlen, | ||
349 | int outlen, int in_hdr_len, int kern_buf) | ||
350 | { | ||
351 | int in, out, err; | ||
352 | unsigned long flags; | ||
353 | int in_nr_pages = 0, out_nr_pages = 0; | ||
354 | struct page **in_pages = NULL, **out_pages = NULL; | ||
355 | struct virtio_chan *chan = client->trans; | ||
317 | 356 | ||
318 | /* Handle out VirtIO ring buffers */ | 357 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); |
319 | out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, | 358 | |
320 | req->tc->size); | 359 | if (uodata) { |
321 | 360 | out_nr_pages = p9_nr_pages(uodata, outlen); | |
322 | if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { | 361 | out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, |
323 | /* We have additional write payload buffer to take care */ | 362 | GFP_NOFS); |
324 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | 363 | if (!out_pages) { |
325 | outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, | 364 | err = -ENOMEM; |
326 | pdata_off, rpinfo->rp_data, pdata_len); | 365 | goto err_out; |
327 | } else { | 366 | } |
328 | char *pbuf; | 367 | out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, |
329 | if (req->tc->pubuf) | 368 | out_nr_pages, 0, kern_buf); |
330 | pbuf = (__force char *) req->tc->pubuf; | 369 | if (out_nr_pages < 0) { |
331 | else | 370 | err = out_nr_pages; |
332 | pbuf = req->tc->pkbuf; | 371 | kfree(out_pages); |
333 | outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, | 372 | out_pages = NULL; |
334 | req->tc->pbuf_size); | 373 | goto err_out; |
335 | } | 374 | } |
336 | out += outp; | ||
337 | } | 375 | } |
338 | 376 | if (uidata) { | |
339 | /* Handle in VirtIO ring buffers */ | 377 | in_nr_pages = p9_nr_pages(uidata, inlen); |
340 | if (req->tc->pbuf_size && | 378 | in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, |
341 | ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { | 379 | GFP_NOFS); |
342 | /* | 380 | if (!in_pages) { |
343 | * Take care of additional Read payload. | 381 | err = -ENOMEM; |
344 | * 11 is the read/write header = PDU Header(7) + IO Size (4). | 382 | goto err_out; |
345 | * Arrange in such a way that server places header in the | 383 | } |
346 | * alloced memory and payload onto the user buffer. | 384 | in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, |
347 | */ | 385 | in_nr_pages, 1, kern_buf); |
348 | inp = pack_sg_list(chan->sg, out, | 386 | if (in_nr_pages < 0) { |
349 | VIRTQUEUE_NUM, req->rc->sdata, 11); | 387 | err = in_nr_pages; |
350 | /* | 388 | kfree(in_pages); |
351 | * Running executables in the filesystem may result in | 389 | in_pages = NULL; |
352 | * a read request with kernel buffer as opposed to user buffer. | 390 | goto err_out; |
353 | */ | ||
354 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | ||
355 | in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
356 | pdata_off, rpinfo->rp_data, pdata_len); | ||
357 | } else { | ||
358 | char *pbuf; | ||
359 | if (req->tc->pubuf) | ||
360 | pbuf = (__force char *) req->tc->pubuf; | ||
361 | else | ||
362 | pbuf = req->tc->pkbuf; | ||
363 | |||
364 | in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
365 | pbuf, req->tc->pbuf_size); | ||
366 | } | 391 | } |
367 | in += inp; | ||
368 | } else { | ||
369 | in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, | ||
370 | req->rc->sdata, req->rc->capacity); | ||
371 | } | 392 | } |
393 | req->status = REQ_STATUS_SENT; | ||
394 | req_retry_pinned: | ||
395 | spin_lock_irqsave(&chan->lock, flags); | ||
396 | /* out data */ | ||
397 | out = pack_sg_list(chan->sg, 0, | ||
398 | VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); | ||
399 | |||
400 | if (out_pages) | ||
401 | out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, | ||
402 | out_pages, out_nr_pages, uodata, outlen); | ||
403 | /* | ||
404 | * Take care of in data | ||
405 | * For example TREAD have 11. | ||
406 | * 11 is the read/write header = PDU Header(7) + IO Size (4). | ||
407 | * Arrange in such a way that server places header in the | ||
408 | * alloced memory and payload onto the user buffer. | ||
409 | */ | ||
410 | in = pack_sg_list(chan->sg, out, | ||
411 | VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); | ||
412 | if (in_pages) | ||
413 | in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, | ||
414 | in_pages, in_nr_pages, uidata, inlen); | ||
372 | 415 | ||
373 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); | 416 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); |
374 | if (err < 0) { | 417 | if (err < 0) { |
@@ -376,28 +419,45 @@ req_retry_pinned: | |||
376 | chan->ring_bufs_avail = 0; | 419 | chan->ring_bufs_avail = 0; |
377 | spin_unlock_irqrestore(&chan->lock, flags); | 420 | spin_unlock_irqrestore(&chan->lock, flags); |
378 | err = wait_event_interruptible(*chan->vc_wq, | 421 | err = wait_event_interruptible(*chan->vc_wq, |
379 | chan->ring_bufs_avail); | 422 | chan->ring_bufs_avail); |
380 | if (err == -ERESTARTSYS) | 423 | if (err == -ERESTARTSYS) |
381 | return err; | 424 | goto err_out; |
382 | 425 | ||
383 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); | 426 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); |
384 | goto req_retry_pinned; | 427 | goto req_retry_pinned; |
385 | } else { | 428 | } else { |
386 | spin_unlock_irqrestore(&chan->lock, flags); | 429 | spin_unlock_irqrestore(&chan->lock, flags); |
387 | P9_DPRINTK(P9_DEBUG_TRANS, | 430 | P9_DPRINTK(P9_DEBUG_TRANS, |
388 | "9p debug: " | 431 | "9p debug: " |
389 | "virtio rpc add_buf returned failure"); | 432 | "virtio rpc add_buf returned failure"); |
390 | if (rpinfo && rpinfo->rp_alloc) | 433 | err = -EIO; |
391 | kfree(rpinfo); | 434 | goto err_out; |
392 | return -EIO; | ||
393 | } | 435 | } |
394 | } | 436 | } |
395 | |||
396 | virtqueue_kick(chan->vq); | 437 | virtqueue_kick(chan->vq); |
397 | spin_unlock_irqrestore(&chan->lock, flags); | 438 | spin_unlock_irqrestore(&chan->lock, flags); |
398 | |||
399 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); | 439 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); |
400 | return 0; | 440 | err = wait_event_interruptible(*req->wq, |
441 | req->status >= REQ_STATUS_RCVD); | ||
442 | /* | ||
443 | * Non kernel buffers are pinned, unpin them | ||
444 | */ | ||
445 | err_out: | ||
446 | if (!kern_buf) { | ||
447 | if (in_pages) { | ||
448 | p9_release_pages(in_pages, in_nr_pages); | ||
449 | atomic_sub(in_nr_pages, &vp_pinned); | ||
450 | } | ||
451 | if (out_pages) { | ||
452 | p9_release_pages(out_pages, out_nr_pages); | ||
453 | atomic_sub(out_nr_pages, &vp_pinned); | ||
454 | } | ||
455 | /* wakeup anybody waiting for slots to pin pages */ | ||
456 | wake_up(&vp_wq); | ||
457 | } | ||
458 | kfree(in_pages); | ||
459 | kfree(out_pages); | ||
460 | return err; | ||
401 | } | 461 | } |
402 | 462 | ||
403 | static ssize_t p9_mount_tag_show(struct device *dev, | 463 | static ssize_t p9_mount_tag_show(struct device *dev, |
@@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = { | |||
591 | .create = p9_virtio_create, | 651 | .create = p9_virtio_create, |
592 | .close = p9_virtio_close, | 652 | .close = p9_virtio_close, |
593 | .request = p9_virtio_request, | 653 | .request = p9_virtio_request, |
654 | .zc_request = p9_virtio_zc_request, | ||
594 | .cancel = p9_virtio_cancel, | 655 | .cancel = p9_virtio_cancel, |
595 | |||
596 | /* | 656 | /* |
597 | * We leave one entry for input and one entry for response | 657 | * We leave one entry for input and one entry for response |
598 | * headers. We also skip one more entry to accomodate, address | 658 | * headers. We also skip one more entry to accomodate, address |
@@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = { | |||
600 | * page in zero copy. | 660 | * page in zero copy. |
601 | */ | 661 | */ |
602 | .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), | 662 | .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), |
603 | .pref = P9_TRANS_PREF_PAYLOAD_SEP, | ||
604 | .def = 0, | 663 | .def = 0, |
605 | .owner = THIS_MODULE, | 664 | .owner = THIS_MODULE, |
606 | }; | 665 | }; |