aboutsummaryrefslogtreecommitdiffstats
path: root/net/9p
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-11 13:21:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-11 13:21:23 -0400
commit19d2f8e0fb7bba99cc585d2467e9fa54a84c8557 (patch)
tree10f2abe6c32e83f5a6017a2c77335a67af0f0ac4 /net/9p
parent746919d2668037f297595da9281a22cd558f3d18 (diff)
parentf2692ea8d5b535277bc06b315eabd32ef4e7a11c (diff)
Merge tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs
Pull second round of 9p patches from Eric Van Hensbergen: "Several of these patches were rebased in order to correct style issues. Only stylistic changes were made versus the patches which were in linux-next for two weeks. The rebases have been in linux-next for 3 days and have passed my regressions. The bulk of these are RDMA fixes and improvements. There's also some additions on the extended attributes front to support some additional namespaces and a new option for TCP to force allocation of mount requests from a priviledged port" * tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs: fs/9p: Remove the unused variable "err" in v9fs_vfs_getattr() 9P: Add cancelled() to the transport functions. 9P/RDMA: count posted buffers without a pending request 9P/RDMA: Improve error handling in rdma_request 9P/RDMA: Do not free req->rc in error handling in rdma_request() 9P/RDMA: Use a semaphore to protect the RQ 9P/RDMA: Protect against duplicate replies 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB 9pnet: refactor struct p9_fcall alloc code 9P/RDMA: rdma_request() needs not allocate req->rc 9P: Fix fcall allocation for rdma fs/9p: xattr: add trusted and security namespaces net/9p: add privport option to 9p tcp transport
Diffstat (limited to 'net/9p')
-rw-r--r--net/9p/client.c70
-rw-r--r--net/9p/trans_fd.c40
-rw-r--r--net/9p/trans_rdma.c133
3 files changed, 167 insertions, 76 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 01f1779eba80..8b93cae2d11d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
204 return ret; 204 return ret;
205} 205}
206 206
207struct p9_fcall *p9_fcall_alloc(int alloc_msize)
208{
209 struct p9_fcall *fc;
210 fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
211 if (!fc)
212 return NULL;
213 fc->capacity = alloc_msize;
214 fc->sdata = (char *) fc + sizeof(struct p9_fcall);
215 return fc;
216}
217
207/** 218/**
208 * p9_tag_alloc - lookup/allocate a request by tag 219 * p9_tag_alloc - lookup/allocate a request by tag
209 * @c: client session to lookup tag within 220 * @c: client session to lookup tag within
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
256 col = tag % P9_ROW_MAXTAG; 267 col = tag % P9_ROW_MAXTAG;
257 268
258 req = &c->reqs[row][col]; 269 req = &c->reqs[row][col];
259 if (!req->tc) { 270 if (!req->wq) {
260 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); 271 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
261 if (!req->wq) { 272 if (!req->wq)
262 pr_err("Couldn't grow tag array\n"); 273 goto grow_failed;
263 return ERR_PTR(-ENOMEM);
264 }
265 init_waitqueue_head(req->wq); 274 init_waitqueue_head(req->wq);
266 req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
267 GFP_NOFS);
268 req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
269 GFP_NOFS);
270 if ((!req->tc) || (!req->rc)) {
271 pr_err("Couldn't grow tag array\n");
272 kfree(req->tc);
273 kfree(req->rc);
274 kfree(req->wq);
275 req->tc = req->rc = NULL;
276 req->wq = NULL;
277 return ERR_PTR(-ENOMEM);
278 }
279 req->tc->capacity = alloc_msize;
280 req->rc->capacity = alloc_msize;
281 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
282 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
283 } 275 }
284 276
277 if (!req->tc)
278 req->tc = p9_fcall_alloc(alloc_msize);
279 if (!req->rc)
280 req->rc = p9_fcall_alloc(alloc_msize);
281 if (!req->tc || !req->rc)
282 goto grow_failed;
283
285 p9pdu_reset(req->tc); 284 p9pdu_reset(req->tc);
286 p9pdu_reset(req->rc); 285 p9pdu_reset(req->rc);
287 286
288 req->tc->tag = tag-1; 287 req->tc->tag = tag-1;
289 req->status = REQ_STATUS_ALLOC; 288 req->status = REQ_STATUS_ALLOC;
290 289
291 return &c->reqs[row][col]; 290 return req;
291
292grow_failed:
293 pr_err("Couldn't grow tag array\n");
294 kfree(req->tc);
295 kfree(req->rc);
296 kfree(req->wq);
297 req->tc = req->rc = NULL;
298 req->wq = NULL;
299 return ERR_PTR(-ENOMEM);
292} 300}
293 301
294/** 302/**
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
648 return PTR_ERR(req); 656 return PTR_ERR(req);
649 657
650 658
651 /* if we haven't received a response for oldreq, 659 /*
652 remove it from the list. */ 660 * if we haven't received a response for oldreq,
661 * remove it from the list, and notify the transport
662 * layer that the reply will never arrive.
663 */
653 spin_lock(&c->lock); 664 spin_lock(&c->lock);
654 if (oldreq->status == REQ_STATUS_FLSH) 665 if (oldreq->status == REQ_STATUS_FLSH) {
655 list_del(&oldreq->req_list); 666 list_del(&oldreq->req_list);
656 spin_unlock(&c->lock); 667 spin_unlock(&c->lock);
668 if (c->trans_mod->cancelled)
669 c->trans_mod->cancelled(c, req);
670 } else {
671 spin_unlock(&c->lock);
672 }
657 673
658 p9_free_req(c, req); 674 p9_free_req(c, req);
659 return 0; 675 return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 02efb25c2957..3ffda1b3799b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
63 int rfd; 63 int rfd;
64 int wfd; 64 int wfd;
65 u16 port; 65 u16 port;
66 int privport;
66}; 67};
67 68
68/** 69/**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
87enum { 88enum {
88 /* Options that take integer arguments */ 89 /* Options that take integer arguments */
89 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, 90 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
91 /* Options that take no arguments */
92 Opt_privport,
90}; 93};
91 94
92static const match_table_t tokens = { 95static const match_table_t tokens = {
93 {Opt_port, "port=%u"}, 96 {Opt_port, "port=%u"},
94 {Opt_rfdno, "rfdno=%u"}, 97 {Opt_rfdno, "rfdno=%u"},
95 {Opt_wfdno, "wfdno=%u"}, 98 {Opt_wfdno, "wfdno=%u"},
99 {Opt_privport, "privport"},
96 {Opt_err, NULL}, 100 {Opt_err, NULL},
97}; 101};
98 102
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
161static LIST_HEAD(p9_poll_pending_list); 165static LIST_HEAD(p9_poll_pending_list);
162static DECLARE_WORK(p9_poll_work, p9_poll_workfn); 166static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
163 167
168static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
169static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
170
164static void p9_mux_poll_stop(struct p9_conn *m) 171static void p9_mux_poll_stop(struct p9_conn *m)
165{ 172{
166 unsigned long flags; 173 unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
741 if (!*p) 748 if (!*p)
742 continue; 749 continue;
743 token = match_token(p, tokens, args); 750 token = match_token(p, tokens, args);
744 if (token != Opt_err) { 751 if ((token != Opt_err) && (token != Opt_privport)) {
745 r = match_int(&args[0], &option); 752 r = match_int(&args[0], &option);
746 if (r < 0) { 753 if (r < 0) {
747 p9_debug(P9_DEBUG_ERROR, 754 p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
759 case Opt_wfdno: 766 case Opt_wfdno:
760 opts->wfd = option; 767 opts->wfd = option;
761 break; 768 break;
769 case Opt_privport:
770 opts->privport = 1;
771 break;
762 default: 772 default:
763 continue; 773 continue;
764 } 774 }
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
898 return 0; 908 return 0;
899} 909}
900 910
911static int p9_bind_privport(struct socket *sock)
912{
913 struct sockaddr_in cl;
914 int port, err = -EINVAL;
915
916 memset(&cl, 0, sizeof(cl));
917 cl.sin_family = AF_INET;
918 cl.sin_addr.s_addr = INADDR_ANY;
919 for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
920 cl.sin_port = htons((ushort)port);
921 err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
922 if (err != -EADDRINUSE)
923 break;
924 }
925 return err;
926}
927
928
901static int 929static int
902p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) 930p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
903{ 931{
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
926 return err; 954 return err;
927 } 955 }
928 956
957 if (opts.privport) {
958 err = p9_bind_privport(csocket);
959 if (err < 0) {
960 pr_err("%s (%d): problem binding to privport\n",
961 __func__, task_pid_nr(current));
962 sock_release(csocket);
963 return err;
964 }
965 }
966
929 err = csocket->ops->connect(csocket, 967 err = csocket->ops->connect(csocket,
930 (struct sockaddr *)&sin_server, 968 (struct sockaddr *)&sin_server,
931 sizeof(struct sockaddr_in), 0); 969 sizeof(struct sockaddr_in), 0);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a1..928f2bb9bf8d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
57#define P9_RDMA_IRD 0 57#define P9_RDMA_IRD 0
58#define P9_RDMA_ORD 0 58#define P9_RDMA_ORD 0
59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
60#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can 60#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
61 * safely advertise a maxsize
62 * of 64k */
63 61
64/** 62/**
65 * struct p9_trans_rdma - RDMA transport instance 63 * struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
75 * @sq_depth: The depth of the Send Queue 73 * @sq_depth: The depth of the Send Queue
76 * @sq_sem: Semaphore for the SQ 74 * @sq_sem: Semaphore for the SQ
77 * @rq_depth: The depth of the Receive Queue. 75 * @rq_depth: The depth of the Receive Queue.
78 * @rq_count: Count of requests in the Receive Queue. 76 * @rq_sem: Semaphore for the RQ
77 * @excess_rc : Amount of posted Receive Contexts without a pending request.
78 * See rdma_request()
79 * @addr: The remote peer's address 79 * @addr: The remote peer's address
80 * @req_lock: Protects the active request list 80 * @req_lock: Protects the active request list
81 * @cm_done: Completion event for connection management tracking 81 * @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
100 int sq_depth; 100 int sq_depth;
101 struct semaphore sq_sem; 101 struct semaphore sq_sem;
102 int rq_depth; 102 int rq_depth;
103 atomic_t rq_count; 103 struct semaphore rq_sem;
104 atomic_t excess_rc;
104 struct sockaddr_in addr; 105 struct sockaddr_in addr;
105 spinlock_t req_lock; 106 spinlock_t req_lock;
106 107
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
296 if (!req) 297 if (!req)
297 goto err_out; 298 goto err_out;
298 299
300 /* Check that we have not yet received a reply for this request.
301 */
302 if (unlikely(req->rc)) {
303 pr_err("Duplicate reply for request %d", tag);
304 goto err_out;
305 }
306
299 req->rc = c->rc; 307 req->rc = c->rc;
300 req->status = REQ_STATUS_RCVD; 308 req->status = REQ_STATUS_RCVD;
301 p9_client_cb(client, req); 309 p9_client_cb(client, req);
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
336 344
337 switch (c->wc_op) { 345 switch (c->wc_op) {
338 case IB_WC_RECV: 346 case IB_WC_RECV:
339 atomic_dec(&rdma->rq_count);
340 handle_recv(client, rdma, c, wc.status, wc.byte_len); 347 handle_recv(client, rdma, c, wc.status, wc.byte_len);
348 up(&rdma->rq_sem);
341 break; 349 break;
342 350
343 case IB_WC_SEND: 351 case IB_WC_SEND:
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
421 struct p9_rdma_context *c = NULL; 429 struct p9_rdma_context *c = NULL;
422 struct p9_rdma_context *rpl_context = NULL; 430 struct p9_rdma_context *rpl_context = NULL;
423 431
432 /* When an error occurs between posting the recv and the send,
433 * there will be a receive context posted without a pending request.
434 * Since there is no way to "un-post" it, we remember it and skip
435 * post_recv() for the next request.
436 * So here,
437 * see if we are this `next request' and need to absorb an excess rc.
438 * If yes, then drop and free our own, and do not recv_post().
439 **/
440 if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
441 if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
442 /* Got one ! */
443 kfree(req->rc);
444 req->rc = NULL;
445 goto dont_need_post_recv;
446 } else {
447 /* We raced and lost. */
448 atomic_inc(&rdma->excess_rc);
449 }
450 }
451
424 /* Allocate an fcall for the reply */ 452 /* Allocate an fcall for the reply */
425 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 453 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
426 if (!rpl_context) { 454 if (!rpl_context) {
427 err = -ENOMEM; 455 err = -ENOMEM;
428 goto err_close; 456 goto recv_error;
429 }
430
431 /*
432 * If the request has a buffer, steal it, otherwise
433 * allocate a new one. Typically, requests should already
434 * have receive buffers allocated and just swap them around
435 */
436 if (!req->rc) {
437 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
438 GFP_NOFS);
439 if (req->rc) {
440 req->rc->sdata = (char *) req->rc +
441 sizeof(struct p9_fcall);
442 req->rc->capacity = client->msize;
443 }
444 } 457 }
445 rpl_context->rc = req->rc; 458 rpl_context->rc = req->rc;
446 if (!rpl_context->rc) {
447 err = -ENOMEM;
448 goto err_free2;
449 }
450 459
451 /* 460 /*
452 * Post a receive buffer for this request. We need to ensure 461 * Post a receive buffer for this request. We need to ensure
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
455 * outstanding request, so we must keep a count to avoid 464 * outstanding request, so we must keep a count to avoid
456 * overflowing the RQ. 465 * overflowing the RQ.
457 */ 466 */
458 if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { 467 if (down_interruptible(&rdma->rq_sem)) {
459 err = post_recv(client, rpl_context); 468 err = -EINTR;
460 if (err) 469 goto recv_error;
461 goto err_free1; 470 }
462 } else
463 atomic_dec(&rdma->rq_count);
464 471
472 err = post_recv(client, rpl_context);
473 if (err) {
474 p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
475 goto recv_error;
476 }
465 /* remove posted receive buffer from request structure */ 477 /* remove posted receive buffer from request structure */
466 req->rc = NULL; 478 req->rc = NULL;
467 479
480dont_need_post_recv:
468 /* Post the request */ 481 /* Post the request */
469 c = kmalloc(sizeof *c, GFP_NOFS); 482 c = kmalloc(sizeof *c, GFP_NOFS);
470 if (!c) { 483 if (!c) {
471 err = -ENOMEM; 484 err = -ENOMEM;
472 goto err_free1; 485 goto send_error;
473 } 486 }
474 c->req = req; 487 c->req = req;
475 488
476 c->busa = ib_dma_map_single(rdma->cm_id->device, 489 c->busa = ib_dma_map_single(rdma->cm_id->device,
477 c->req->tc->sdata, c->req->tc->size, 490 c->req->tc->sdata, c->req->tc->size,
478 DMA_TO_DEVICE); 491 DMA_TO_DEVICE);
479 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 492 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
480 goto error; 493 err = -EIO;
494 goto send_error;
495 }
481 496
482 sge.addr = c->busa; 497 sge.addr = c->busa;
483 sge.length = c->req->tc->size; 498 sge.length = c->req->tc->size;
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
491 wr.sg_list = &sge; 506 wr.sg_list = &sge;
492 wr.num_sge = 1; 507 wr.num_sge = 1;
493 508
494 if (down_interruptible(&rdma->sq_sem)) 509 if (down_interruptible(&rdma->sq_sem)) {
495 goto error; 510 err = -EINTR;
511 goto send_error;
512 }
496 513
497 return ib_post_send(rdma->qp, &wr, &bad_wr); 514 err = ib_post_send(rdma->qp, &wr, &bad_wr);
515 if (err)
516 goto send_error;
498 517
499 error: 518 /* Success */
519 return 0;
520
521 /* Handle errors that happened during or while preparing the send: */
522 send_error:
500 kfree(c); 523 kfree(c);
501 kfree(rpl_context->rc); 524 p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
502 kfree(rpl_context); 525
503 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 526 /* Ach.
504 return -EIO; 527 * We did recv_post(), but not send. We have one recv_post in excess.
505 err_free1: 528 */
506 kfree(rpl_context->rc); 529 atomic_inc(&rdma->excess_rc);
507 err_free2: 530 return err;
531
532 /* Handle errors that happened during or while preparing post_recv(): */
533 recv_error:
508 kfree(rpl_context); 534 kfree(rpl_context);
509 err_close:
510 spin_lock_irqsave(&rdma->req_lock, flags); 535 spin_lock_irqsave(&rdma->req_lock, flags);
511 if (rdma->state < P9_RDMA_CLOSING) { 536 if (rdma->state < P9_RDMA_CLOSING) {
512 rdma->state = P9_RDMA_CLOSING; 537 rdma->state = P9_RDMA_CLOSING;
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
551 spin_lock_init(&rdma->req_lock); 576 spin_lock_init(&rdma->req_lock);
552 init_completion(&rdma->cm_done); 577 init_completion(&rdma->cm_done);
553 sema_init(&rdma->sq_sem, rdma->sq_depth); 578 sema_init(&rdma->sq_sem, rdma->sq_depth);
554 atomic_set(&rdma->rq_count, 0); 579 sema_init(&rdma->rq_sem, rdma->rq_depth);
580 atomic_set(&rdma->excess_rc, 0);
555 581
556 return rdma; 582 return rdma;
557} 583}
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
562 return 1; 588 return 1;
563} 589}
564 590
591/* A request has been fully flushed without a reply.
592 * That means we have posted one buffer in excess.
593 */
594static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
595{
596 struct p9_trans_rdma *rdma = client->trans;
597
598 atomic_inc(&rdma->excess_rc);
599 return 0;
600}
601
565/** 602/**
566 * trans_create_rdma - Transport method for creating atransport instance 603 * trans_create_rdma - Transport method for creating atransport instance
567 * @client: client instance 604 * @client: client instance