Merge tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs

Pull second round of 9p patches from Eric Van Hensbergen: "Several of these patches were rebased in order to correct style issues. Only stylistic changes were made versus the patches which were in linux-next for two weeks. The rebases have been in linux-next for 3 days and have passed my regressions. The bulk of these are RDMA fixes and improvements. There's also some additions on the extended attributes front to support some additional namespaces and a new option for TCP to force allocation of mount requests from a priviledged port" * tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs: fs/9p: Remove the unused variable "err" in v9fs_vfs_getattr() 9P: Add cancelled() to the transport functions. 9P/RDMA: count posted buffers without a pending request 9P/RDMA: Improve error handling in rdma_request 9P/RDMA: Do not free req->rc in error handling in rdma_request() 9P/RDMA: Use a semaphore to protect the RQ 9P/RDMA: Protect against duplicate replies 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB 9pnet: refactor struct p9_fcall alloc code 9P/RDMA: rdma_request() needs not allocate req->rc 9P: Fix fcall allocation for rdma fs/9p: xattr: add trusted and security namespaces net/9p: add privport option to 9p tcp transport
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 13:21:23 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 13:21:23 -0400
commit: 19d2f8e0fb7bba99cc585d2467e9fa54a84c8557 (patch)
tree: 10f2abe6c32e83f5a6017a2c77335a67af0f0ac4 /net/9p
parent: 746919d2668037f297595da9281a22cd558f3d18 (diff)
parent: f2692ea8d5b535277bc06b315eabd32ef4e7a11c (diff)
3 files changed, 167 insertions, 76 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 01f1779eba80..8b93cae2d11d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
        return ret;
 }
+struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+{
+        struct p9_fcall *fc;
+        fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
+        if (!fc)
+                return NULL;
+        fc->capacity = alloc_msize;
+        fc->sdata = (char *) fc + sizeof(struct p9_fcall);
+        return fc;
+}
 /**
 * p9_tag_alloc - lookup/allocate a request by tag
 * @c: client session to lookup tag within
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
        col = tag % P9_ROW_MAXTAG;
        req = &c->reqs[row][col];
-        if (!req->tc) {
+        if (!req->wq) {
                req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
-                if (!req->wq) {
+                if (!req->wq)
-                        pr_err("Couldn't grow tag array\n");
+                        goto grow_failed;
-                        return ERR_PTR(-ENOMEM);
-                }
                init_waitqueue_head(req->wq);
-                req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
-                                  GFP_NOFS);
-                req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
-                                  GFP_NOFS);
-                if ((!req->tc) || (!req->rc)) {
-                        pr_err("Couldn't grow tag array\n");
-                        kfree(req->tc);
-                        kfree(req->rc);
-                        kfree(req->wq);
-                        req->tc = req->rc = NULL;
-                        req->wq = NULL;
-                        return ERR_PTR(-ENOMEM);
-                }
-                req->tc->capacity = alloc_msize;
-                req->rc->capacity = alloc_msize;
-                req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
-                req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
        }
+        if (!req->tc)
+                req->tc = p9_fcall_alloc(alloc_msize);
+        if (!req->rc)
+                req->rc = p9_fcall_alloc(alloc_msize);
+        if (!req->tc || !req->rc)
+                goto grow_failed;
        p9pdu_reset(req->tc);
        p9pdu_reset(req->rc);
        req->tc->tag = tag-1;
        req->status = REQ_STATUS_ALLOC;
-        return &c->reqs[row][col];
+        return req;
+grow_failed:
+        pr_err("Couldn't grow tag array\n");
+        kfree(req->tc);
+        kfree(req->rc);
+        kfree(req->wq);
+        req->tc = req->rc = NULL;
+        req->wq = NULL;
+        return ERR_PTR(-ENOMEM);
 }
 /**
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
                return PTR_ERR(req);
-        /* if we haven't received a response for oldreq,
+        /*
-           remove it from the list. */
+         * if we haven't received a response for oldreq,
+         * remove it from the list, and notify the transport
+         * layer that the reply will never arrive.
+         */
        spin_lock(&c->lock);
-        if (oldreq->status == REQ_STATUS_FLSH)
+        if (oldreq->status == REQ_STATUS_FLSH) {
                list_del(&oldreq->req_list);
-        spin_unlock(&c->lock);
+                spin_unlock(&c->lock);
+                if (c->trans_mod->cancelled)
+                        c->trans_mod->cancelled(c, req);
+        } else {
+                spin_unlock(&c->lock);
+        }
        p9_free_req(c, req);
        return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 02efb25c2957..3ffda1b3799b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
        int rfd;
        int wfd;
        u16 port;
+        int privport;
 };
 /**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
 enum {
        /* Options that take integer arguments */
        Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
+        /* Options that take no arguments */
+        Opt_privport,
 };
 static const match_table_t tokens = {
        {Opt_port, "port=%u"},
        {Opt_rfdno, "rfdno=%u"},
        {Opt_wfdno, "wfdno=%u"},
+        {Opt_privport, "privport"},
        {Opt_err, NULL},
 };
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
 static LIST_HEAD(p9_poll_pending_list);
 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
+static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
+static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
 static void p9_mux_poll_stop(struct p9_conn *m)
 {
        unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
                if (!*p)
                        continue;
                token = match_token(p, tokens, args);
-                if (token != Opt_err) {
+                if ((token != Opt_err) && (token != Opt_privport)) {
                        r = match_int(&args[0], &option);
                        if (r < 0) {
                                p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
                case Opt_wfdno:
                        opts->wfd = option;
                        break;
+                case Opt_privport:
+                        opts->privport = 1;
+                        break;
                default:
                        continue;
                }
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
        return 0;
 }
+static int p9_bind_privport(struct socket *sock)
+{
+        struct sockaddr_in cl;
+        int port, err = -EINVAL;
+        memset(&cl, 0, sizeof(cl));
+        cl.sin_family = AF_INET;
+        cl.sin_addr.s_addr = INADDR_ANY;
+        for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
+                cl.sin_port = htons((ushort)port);
+                err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
+                if (err != -EADDRINUSE)
+                        break;
+        }
+        return err;
+}
 static int
 p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 {
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
                return err;
        }
+        if (opts.privport) {
+                err = p9_bind_privport(csocket);
+                if (err < 0) {
+                        pr_err("%s (%d): problem binding to privport\n",
+                               __func__, task_pid_nr(current));
+                        sock_release(csocket);
+                        return err;
+                }
+        }
        err = csocket->ops->connect(csocket,
                                    (struct sockaddr *)&sin_server,
                                    sizeof(struct sockaddr_in), 0);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a1..928f2bb9bf8d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
 #define P9_RDMA_IRD             0
 #define P9_RDMA_ORD             0
 #define P9_RDMA_TIMEOUT         30000           /* 30 seconds */
-#define P9_RDMA_MAXSIZE         (4*4096)        /* Min SGE is 4, so we can
+#define P9_RDMA_MAXSIZE         (1024*1024)     /* 1MB */
-                                                 * safely advertise a maxsize
-                                                 * of 64k */
 /**
 * struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
 * @sq_depth: The depth of the Send Queue
 * @sq_sem: Semaphore for the SQ
 * @rq_depth: The depth of the Receive Queue.
- * @rq_count: Count of requests in the Receive Queue.
+ * @rq_sem: Semaphore for the RQ
+ * @excess_rc : Amount of posted Receive Contexts without a pending request.
+ *              See rdma_request()
 * @addr: The remote peer's address
 * @req_lock: Protects the active request list
 * @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
        int sq_depth;
        struct semaphore sq_sem;
        int rq_depth;
-        atomic_t rq_count;
+        struct semaphore rq_sem;
+        atomic_t excess_rc;
        struct sockaddr_in addr;
        spinlock_t req_lock;
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
        if (!req)
                goto err_out;
+        /* Check that we have not yet received a reply for this request.
+         */
+        if (unlikely(req->rc)) {
+                pr_err("Duplicate reply for request %d", tag);
+                goto err_out;
+        }
        req->rc = c->rc;
        req->status = REQ_STATUS_RCVD;
        p9_client_cb(client, req);
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
                switch (c->wc_op) {
                case IB_WC_RECV:
-                        atomic_dec(&rdma->rq_count);
                        handle_recv(client, rdma, c, wc.status, wc.byte_len);
+                        up(&rdma->rq_sem);
                        break;
                case IB_WC_SEND:
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
        struct p9_rdma_context *c = NULL;
        struct p9_rdma_context *rpl_context = NULL;
+        /* When an error occurs between posting the recv and the send,
+         * there will be a receive context posted without a pending request.
+         * Since there is no way to "un-post" it, we remember it and skip
+         * post_recv() for the next request.
+         * So here,
+         * see if we are this `next request' and need to absorb an excess rc.
+         * If yes, then drop and free our own, and do not recv_post().
+         **/
+        if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
+                if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
+                        /* Got one ! */
+                        kfree(req->rc);
+                        req->rc = NULL;
+                        goto dont_need_post_recv;
+                } else {
+                        /* We raced and lost. */
+                        atomic_inc(&rdma->excess_rc);
+                }
+        }
        /* Allocate an fcall for the reply */
        rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
        if (!rpl_context) {
                err = -ENOMEM;
-                goto err_close;
+                goto recv_error;
-        }
-        /*
-         * If the request has a buffer, steal it, otherwise
-         * allocate a new one.  Typically, requests should already
-         * have receive buffers allocated and just swap them around
-         */
-        if (!req->rc) {
-                req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
-                                  GFP_NOFS);
-                if (req->rc) {
-                        req->rc->sdata = (char *) req->rc +
-                                                sizeof(struct p9_fcall);
-                        req->rc->capacity = client->msize;
-                }
        }
        rpl_context->rc = req->rc;
-        if (!rpl_context->rc) {
-                err = -ENOMEM;
-                goto err_free2;
-        }
        /*
         * Post a receive buffer for this request. We need to ensure
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
         * outstanding request, so we must keep a count to avoid
         * overflowing the RQ.
         */
-        if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
+        if (down_interruptible(&rdma->rq_sem)) {
-                err = post_recv(client, rpl_context);
+                err = -EINTR;
-                if (err)
+                goto recv_error;
-                        goto err_free1;
+        }
-        } else
-                atomic_dec(&rdma->rq_count);
+        err = post_recv(client, rpl_context);
+        if (err) {
+                p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+                goto recv_error;
+        }
        /* remove posted receive buffer from request structure */
        req->rc = NULL;
+dont_need_post_recv:
        /* Post the request */
        c = kmalloc(sizeof *c, GFP_NOFS);
        if (!c) {
                err = -ENOMEM;
-                goto err_free1;
+                goto send_error;
        }
        c->req = req;
        c->busa = ib_dma_map_single(rdma->cm_id->device,
                                    c->req->tc->sdata, c->req->tc->size,
                                    DMA_TO_DEVICE);
-        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
+        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
-                goto error;
+                err = -EIO;
+                goto send_error;
+        }
        sge.addr = c->busa;
        sge.length = c->req->tc->size;
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
        wr.sg_list = &sge;
        wr.num_sge = 1;
-        if (down_interruptible(&rdma->sq_sem))
+        if (down_interruptible(&rdma->sq_sem)) {
-                goto error;
+                err = -EINTR;
+                goto send_error;
+        }
-        return ib_post_send(rdma->qp, &wr, &bad_wr);
+        err = ib_post_send(rdma->qp, &wr, &bad_wr);
+        if (err)
+                goto send_error;
- error:
+        /* Success */
+        return 0;
+ /* Handle errors that happened during or while preparing the send: */
+ send_error:
        kfree(c);
-        kfree(rpl_context->rc);
+        p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
-        kfree(rpl_context);
-        p9_debug(P9_DEBUG_ERROR, "EIO\n");
+        /* Ach.
-        return -EIO;
+         *  We did recv_post(), but not send. We have one recv_post in excess.
- err_free1:
+         */
-        kfree(rpl_context->rc);
+        atomic_inc(&rdma->excess_rc);
- err_free2:
+        return err;
+ /* Handle errors that happened during or while preparing post_recv(): */
+ recv_error:
        kfree(rpl_context);
- err_close:
        spin_lock_irqsave(&rdma->req_lock, flags);
        if (rdma->state < P9_RDMA_CLOSING) {
                rdma->state = P9_RDMA_CLOSING;
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
        spin_lock_init(&rdma->req_lock);
        init_completion(&rdma->cm_done);
        sema_init(&rdma->sq_sem, rdma->sq_depth);
-        atomic_set(&rdma->rq_count, 0);
+        sema_init(&rdma->rq_sem, rdma->rq_depth);
+        atomic_set(&rdma->excess_rc, 0);
        return rdma;
 }
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
        return 1;
 }
+/* A request has been fully flushed without a reply.
+ * That means we have posted one buffer in excess.
+ */
+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+        struct p9_trans_rdma *rdma = client->trans;
+        atomic_inc(&rdma->excess_rc);
+        return 0;
+}
 /**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 13:21:23 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 13:21:23 -0400
commit	19d2f8e0fb7bba99cc585d2467e9fa54a84c8557 (patch)
tree	10f2abe6c32e83f5a6017a2c77335a67af0f0ac4 /net/9p
parent	746919d2668037f297595da9281a22cd558f3d18 (diff)
parent	f2692ea8d5b535277bc06b315eabd32ef4e7a11c (diff)

diff --git a/net/9p/client.c b/net/9p/client.c index 01f1779eba80..8b93cae2d11d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
204	return ret;	204	return ret;
205	}	205	}
206		206
		207	struct p9_fcall *p9_fcall_alloc(int alloc_msize)
		208	{
		209	struct p9_fcall *fc;
		210	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
		211	if (!fc)
		212	return NULL;
		213	fc->capacity = alloc_msize;
		214	fc->sdata = (char *) fc + sizeof(struct p9_fcall);
		215	return fc;
		216	}
		217
207	/**	218	/**
208	* p9_tag_alloc - lookup/allocate a request by tag	219	* p9_tag_alloc - lookup/allocate a request by tag
209	* @c: client session to lookup tag within	220	* @c: client session to lookup tag within
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
256	col = tag % P9_ROW_MAXTAG;	267	col = tag % P9_ROW_MAXTAG;
257		268
258	req = &c->reqs[row][col];	269	req = &c->reqs[row][col];
259	if (!req->tc) {	270	if (!req->wq) {
260	req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);	271	req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
261	if (!req->wq) {	272	if (!req->wq)
262	pr_err("Couldn't grow tag array\n");	273	goto grow_failed;
263	return ERR_PTR(-ENOMEM);
264	}
265	init_waitqueue_head(req->wq);	274	init_waitqueue_head(req->wq);
266	req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
267	GFP_NOFS);
268	req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
269	GFP_NOFS);
270	if ((!req->tc) \|\| (!req->rc)) {
271	pr_err("Couldn't grow tag array\n");
272	kfree(req->tc);
273	kfree(req->rc);
274	kfree(req->wq);
275	req->tc = req->rc = NULL;
276	req->wq = NULL;
277	return ERR_PTR(-ENOMEM);
278	}
279	req->tc->capacity = alloc_msize;
280	req->rc->capacity = alloc_msize;
281	req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
282	req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
283	}	275	}
284		276
		277	if (!req->tc)
		278	req->tc = p9_fcall_alloc(alloc_msize);
		279	if (!req->rc)
		280	req->rc = p9_fcall_alloc(alloc_msize);
		281	if (!req->tc \|\| !req->rc)
		282	goto grow_failed;
		283
285	p9pdu_reset(req->tc);	284	p9pdu_reset(req->tc);
286	p9pdu_reset(req->rc);	285	p9pdu_reset(req->rc);
287		286
288	req->tc->tag = tag-1;	287	req->tc->tag = tag-1;
289	req->status = REQ_STATUS_ALLOC;	288	req->status = REQ_STATUS_ALLOC;
290		289
291	return &c->reqs[row][col];	290	return req;
		291
		292	grow_failed:
		293	pr_err("Couldn't grow tag array\n");
		294	kfree(req->tc);
		295	kfree(req->rc);
		296	kfree(req->wq);
		297	req->tc = req->rc = NULL;
		298	req->wq = NULL;
		299	return ERR_PTR(-ENOMEM);
292	}	300	}
293		301
294	/**	302	/**
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client c, struct p9_req_t oldreq)
648	return PTR_ERR(req);	656	return PTR_ERR(req);
649		657
650		658
651	/* if we haven't received a response for oldreq,	659	/*
652	remove it from the list. */	660	* if we haven't received a response for oldreq,
		661	* remove it from the list, and notify the transport
		662	* layer that the reply will never arrive.
		663	*/
653	spin_lock(&c->lock);	664	spin_lock(&c->lock);
654	if (oldreq->status == REQ_STATUS_FLSH)	665	if (oldreq->status == REQ_STATUS_FLSH) {
655	list_del(&oldreq->req_list);	666	list_del(&oldreq->req_list);
656	spin_unlock(&c->lock);	667	spin_unlock(&c->lock);
		668	if (c->trans_mod->cancelled)
		669	c->trans_mod->cancelled(c, req);
		670	} else {
		671	spin_unlock(&c->lock);
		672	}
657		673
658	p9_free_req(c, req);	674	p9_free_req(c, req);
659	return 0;	675	return 0;


diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 02efb25c2957..3ffda1b3799b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
63	int rfd;	63	int rfd;
64	int wfd;	64	int wfd;
65	u16 port;	65	u16 port;
		66	int privport;
66	};	67	};
67		68
68	/**	69	/**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
87	enum {	88	enum {
88	/* Options that take integer arguments */	89	/* Options that take integer arguments */
89	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,	90	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
		91	/* Options that take no arguments */
		92	Opt_privport,
90	};	93	};
91		94
92	static const match_table_t tokens = {	95	static const match_table_t tokens = {
93	{Opt_port, "port=%u"},	96	{Opt_port, "port=%u"},
94	{Opt_rfdno, "rfdno=%u"},	97	{Opt_rfdno, "rfdno=%u"},
95	{Opt_wfdno, "wfdno=%u"},	98	{Opt_wfdno, "wfdno=%u"},
		99	{Opt_privport, "privport"},
96	{Opt_err, NULL},	100	{Opt_err, NULL},
97	};	101	};
98		102
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
161	static LIST_HEAD(p9_poll_pending_list);	165	static LIST_HEAD(p9_poll_pending_list);
162	static DECLARE_WORK(p9_poll_work, p9_poll_workfn);	166	static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
163		167
		168	static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
		169	static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
		170
164	static void p9_mux_poll_stop(struct p9_conn *m)	171	static void p9_mux_poll_stop(struct p9_conn *m)
165	{	172	{
166	unsigned long flags;	173	unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char params, struct p9_fd_opts opts)
741	if (!*p)	748	if (!*p)
742	continue;	749	continue;
743	token = match_token(p, tokens, args);	750	token = match_token(p, tokens, args);
744	if (token != Opt_err) {	751	if ((token != Opt_err) && (token != Opt_privport)) {
745	r = match_int(&args[0], &option);	752	r = match_int(&args[0], &option);
746	if (r < 0) {	753	if (r < 0) {
747	p9_debug(P9_DEBUG_ERROR,	754	p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char params, struct p9_fd_opts opts)
759	case Opt_wfdno:	766	case Opt_wfdno:
760	opts->wfd = option;	767	opts->wfd = option;
761	break;	768	break;
		769	case Opt_privport:
		770	opts->privport = 1;
		771	break;
762	default:	772	default:
763	continue;	773	continue;
764	}	774	}
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
898	return 0;	908	return 0;
899	}	909	}
900		910
		911	static int p9_bind_privport(struct socket *sock)
		912	{
		913	struct sockaddr_in cl;
		914	int port, err = -EINVAL;
		915
		916	memset(&cl, 0, sizeof(cl));
		917	cl.sin_family = AF_INET;
		918	cl.sin_addr.s_addr = INADDR_ANY;
		919	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
		920	cl.sin_port = htons((ushort)port);
		921	err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
		922	if (err != -EADDRINUSE)
		923	break;
		924	}
		925	return err;
		926	}
		927
		928
901	static int	929	static int
902	p9_fd_create_tcp(struct p9_client client, const char addr, char *args)	930	p9_fd_create_tcp(struct p9_client client, const char addr, char *args)
903	{	931	{
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client client, const char addr, char *args)
926	return err;	954	return err;
927	}	955	}
928		956
		957	if (opts.privport) {
		958	err = p9_bind_privport(csocket);
		959	if (err < 0) {
		960	pr_err("%s (%d): problem binding to privport\n",
		961	__func__, task_pid_nr(current));
		962	sock_release(csocket);
		963	return err;
		964	}
		965	}
		966
929	err = csocket->ops->connect(csocket,	967	err = csocket->ops->connect(csocket,
930	(struct sockaddr *)&sin_server,	968	(struct sockaddr *)&sin_server,
931	sizeof(struct sockaddr_in), 0);	969	sizeof(struct sockaddr_in), 0);


diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2c69ddd691a1..928f2bb9bf8d 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
57	#define P9_RDMA_IRD 0	57	#define P9_RDMA_IRD 0
58	#define P9_RDMA_ORD 0	58	#define P9_RDMA_ORD 0
59	#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */	59	#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
60	#define P9_RDMA_MAXSIZE (44096) / Min SGE is 4, so we can	60	#define P9_RDMA_MAXSIZE (10241024) / 1MB */
61	* safely advertise a maxsize
62	* of 64k */
63		61
64	/**	62	/**
65	* struct p9_trans_rdma - RDMA transport instance	63	* struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
75	* @sq_depth: The depth of the Send Queue	73	* @sq_depth: The depth of the Send Queue
76	* @sq_sem: Semaphore for the SQ	74	* @sq_sem: Semaphore for the SQ
77	* @rq_depth: The depth of the Receive Queue.	75	* @rq_depth: The depth of the Receive Queue.
78	* @rq_count: Count of requests in the Receive Queue.	76	* @rq_sem: Semaphore for the RQ
		77	* @excess_rc : Amount of posted Receive Contexts without a pending request.
		78	* See rdma_request()
79	* @addr: The remote peer's address	79	* @addr: The remote peer's address
80	* @req_lock: Protects the active request list	80	* @req_lock: Protects the active request list
81	* @cm_done: Completion event for connection management tracking	81	* @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
100	int sq_depth;	100	int sq_depth;
101	struct semaphore sq_sem;	101	struct semaphore sq_sem;
102	int rq_depth;	102	int rq_depth;
103	atomic_t rq_count;	103	struct semaphore rq_sem;
		104	atomic_t excess_rc;
104	struct sockaddr_in addr;	105	struct sockaddr_in addr;
105	spinlock_t req_lock;	106	spinlock_t req_lock;
106		107
@@ -296,6 +297,13 @@ handle_recv(struct p9_client client, struct p9_trans_rdma rdma,
296	if (!req)	297	if (!req)
297	goto err_out;	298	goto err_out;
298		299
		300	/* Check that we have not yet received a reply for this request.
		301	*/
		302	if (unlikely(req->rc)) {
		303	pr_err("Duplicate reply for request %d", tag);
		304	goto err_out;
		305	}
		306
299	req->rc = c->rc;	307	req->rc = c->rc;
300	req->status = REQ_STATUS_RCVD;	308	req->status = REQ_STATUS_RCVD;
301	p9_client_cb(client, req);	309	p9_client_cb(client, req);
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq cq, void cq_context)
336		344
337	switch (c->wc_op) {	345	switch (c->wc_op) {
338	case IB_WC_RECV:	346	case IB_WC_RECV:
339	atomic_dec(&rdma->rq_count);
340	handle_recv(client, rdma, c, wc.status, wc.byte_len);	347	handle_recv(client, rdma, c, wc.status, wc.byte_len);
		348	up(&rdma->rq_sem);
341	break;	349	break;
342		350
343	case IB_WC_SEND:	351	case IB_WC_SEND:
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client client, struct p9_req_t req)
421	struct p9_rdma_context *c = NULL;	429	struct p9_rdma_context *c = NULL;
422	struct p9_rdma_context *rpl_context = NULL;	430	struct p9_rdma_context *rpl_context = NULL;
423		431
		432	/* When an error occurs between posting the recv and the send,
		433	* there will be a receive context posted without a pending request.
		434	* Since there is no way to "un-post" it, we remember it and skip
		435	* post_recv() for the next request.
		436	* So here,
		437	* see if we are this `next request' and need to absorb an excess rc.
		438	* If yes, then drop and free our own, and do not recv_post().
		439	**/
		440	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
		441	if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
		442	/* Got one ! */
		443	kfree(req->rc);
		444	req->rc = NULL;
		445	goto dont_need_post_recv;
		446	} else {
		447	/* We raced and lost. */
		448	atomic_inc(&rdma->excess_rc);
		449	}
		450	}
		451
424	/* Allocate an fcall for the reply */	452	/* Allocate an fcall for the reply */
425	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);	453	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
426	if (!rpl_context) {	454	if (!rpl_context) {
427	err = -ENOMEM;	455	err = -ENOMEM;
428	goto err_close;	456	goto recv_error;
429	}
430
431	/*
432	* If the request has a buffer, steal it, otherwise
433	* allocate a new one. Typically, requests should already
434	* have receive buffers allocated and just swap them around
435	*/
436	if (!req->rc) {
437	req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
438	GFP_NOFS);
439	if (req->rc) {
440	req->rc->sdata = (char *) req->rc +
441	sizeof(struct p9_fcall);
442	req->rc->capacity = client->msize;
443	}
444	}	457	}
445	rpl_context->rc = req->rc;	458	rpl_context->rc = req->rc;
446	if (!rpl_context->rc) {
447	err = -ENOMEM;
448	goto err_free2;
449	}
450		459
451	/*	460	/*
452	* Post a receive buffer for this request. We need to ensure	461	* Post a receive buffer for this request. We need to ensure
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client client, struct p9_req_t req)
455	* outstanding request, so we must keep a count to avoid	464	* outstanding request, so we must keep a count to avoid
456	* overflowing the RQ.	465	* overflowing the RQ.
457	*/	466	*/
458	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {	467	if (down_interruptible(&rdma->rq_sem)) {
459	err = post_recv(client, rpl_context);	468	err = -EINTR;
460	if (err)	469	goto recv_error;
461	goto err_free1;	470	}
462	} else
463	atomic_dec(&rdma->rq_count);
464		471
		472	err = post_recv(client, rpl_context);
		473	if (err) {
		474	p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
		475	goto recv_error;
		476	}
465	/* remove posted receive buffer from request structure */	477	/* remove posted receive buffer from request structure */
466	req->rc = NULL;	478	req->rc = NULL;
467		479
		480	dont_need_post_recv:
468	/* Post the request */	481	/* Post the request */
469	c = kmalloc(sizeof *c, GFP_NOFS);	482	c = kmalloc(sizeof *c, GFP_NOFS);
470	if (!c) {	483	if (!c) {
471	err = -ENOMEM;	484	err = -ENOMEM;
472	goto err_free1;	485	goto send_error;
473	}	486	}
474	c->req = req;	487	c->req = req;
475		488
476	c->busa = ib_dma_map_single(rdma->cm_id->device,	489	c->busa = ib_dma_map_single(rdma->cm_id->device,
477	c->req->tc->sdata, c->req->tc->size,	490	c->req->tc->sdata, c->req->tc->size,
478	DMA_TO_DEVICE);	491	DMA_TO_DEVICE);
479	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))	492	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
480	goto error;	493	err = -EIO;
		494	goto send_error;
		495	}
481		496
482	sge.addr = c->busa;	497	sge.addr = c->busa;
483	sge.length = c->req->tc->size;	498	sge.length = c->req->tc->size;
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client client, struct p9_req_t req)
491	wr.sg_list = &sge;	506	wr.sg_list = &sge;
492	wr.num_sge = 1;	507	wr.num_sge = 1;
493		508
494	if (down_interruptible(&rdma->sq_sem))	509	if (down_interruptible(&rdma->sq_sem)) {
495	goto error;	510	err = -EINTR;
		511	goto send_error;
		512	}
496		513
497	return ib_post_send(rdma->qp, &wr, &bad_wr);	514	err = ib_post_send(rdma->qp, &wr, &bad_wr);
		515	if (err)
		516	goto send_error;
498		517
499	error:	518	/* Success */
		519	return 0;
		520
		521	/* Handle errors that happened during or while preparing the send: */
		522	send_error:
500	kfree(c);	523	kfree(c);
501	kfree(rpl_context->rc);	524	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
502	kfree(rpl_context);	525
503	p9_debug(P9_DEBUG_ERROR, "EIO\n");	526	/* Ach.
504	return -EIO;	527	* We did recv_post(), but not send. We have one recv_post in excess.
505	err_free1:	528	*/
506	kfree(rpl_context->rc);	529	atomic_inc(&rdma->excess_rc);
507	err_free2:	530	return err;
		531
		532	/* Handle errors that happened during or while preparing post_recv(): */
		533	recv_error:
508	kfree(rpl_context);	534	kfree(rpl_context);
509	err_close:
510	spin_lock_irqsave(&rdma->req_lock, flags);	535	spin_lock_irqsave(&rdma->req_lock, flags);
511	if (rdma->state < P9_RDMA_CLOSING) {	536	if (rdma->state < P9_RDMA_CLOSING) {
512	rdma->state = P9_RDMA_CLOSING;	537	rdma->state = P9_RDMA_CLOSING;
@@ -551,7 +576,8 @@ static struct p9_trans_rdma alloc_rdma(struct p9_rdma_opts opts)
551	spin_lock_init(&rdma->req_lock);	576	spin_lock_init(&rdma->req_lock);
552	init_completion(&rdma->cm_done);	577	init_completion(&rdma->cm_done);
553	sema_init(&rdma->sq_sem, rdma->sq_depth);	578	sema_init(&rdma->sq_sem, rdma->sq_depth);
554	atomic_set(&rdma->rq_count, 0);	579	sema_init(&rdma->rq_sem, rdma->rq_depth);
		580	atomic_set(&rdma->excess_rc, 0);
555		581
556	return rdma;	582	return rdma;
557	}	583	}
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client client, struct p9_req_t req)
562	return 1;	588	return 1;
563	}	589	}
564		590
		591	/* A request has been fully flushed without a reply.
		592	* That means we have posted one buffer in excess.
		593	*/
		594	static int rdma_cancelled(struct p9_client client, struct p9_req_t req)
		595	{
		596	struct p9_trans_rdma *rdma = client->trans;
		597
		598	atomic_inc(&rdma->excess_rc);
		599	return 0;
		600	}
		601
565	/**	602	/**
566	* trans_create_rdma - Transport method for creating atransport instance	603	* trans_create_rdma - Transport method for creating atransport instance
567	* @client: client instance	604	* @client: client instance