aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/9p/Kconfig13
-rw-r--r--fs/9p/Makefile4
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/9p/xattr.c4
-rw-r--r--fs/9p/xattr.h2
-rw-r--r--fs/9p/xattr_security.c80
-rw-r--r--fs/9p/xattr_trusted.c80
-rw-r--r--include/net/9p/transport.h6
-rw-r--r--net/9p/client.c70
-rw-r--r--net/9p/trans_fd.c40
-rw-r--r--net/9p/trans_rdma.c133
11 files changed, 355 insertions, 79 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 55abfd62654a..6489e1fc1afd 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL
31 If you don't know what Access Control Lists are, say N 31 If you don't know what Access Control Lists are, say N
32 32
33endif 33endif
34
35
36config 9P_FS_SECURITY
37 bool "9P Security Labels"
38 depends on 9P_FS
39 help
40 Security labels support alternative access control models
41 implemented by security modules like SELinux. This option
42 enables an extended attribute handler for file security
43 labels in the 9P filesystem.
44
45 If you are not using a security module that requires using
46 extended attributes for file security labels, say N.
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index ab8c12780634..ff7be98f84f2 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o
11 v9fs.o \ 11 v9fs.o \
12 fid.o \ 12 fid.o \
13 xattr.o \ 13 xattr.o \
14 xattr_user.o 14 xattr_user.o \
15 xattr_trusted.o
15 16
169p-$(CONFIG_9P_FSCACHE) += cache.o 179p-$(CONFIG_9P_FSCACHE) += cache.o
179p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o 189p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
199p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index d86edc8d3fd0..25b018efb8ab 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1054,13 +1054,11 @@ static int
1054v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1054v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1055 struct kstat *stat) 1055 struct kstat *stat)
1056{ 1056{
1057 int err;
1058 struct v9fs_session_info *v9ses; 1057 struct v9fs_session_info *v9ses;
1059 struct p9_fid *fid; 1058 struct p9_fid *fid;
1060 struct p9_wstat *st; 1059 struct p9_wstat *st;
1061 1060
1062 p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); 1061 p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
1063 err = -EPERM;
1064 v9ses = v9fs_dentry2v9ses(dentry); 1062 v9ses = v9fs_dentry2v9ses(dentry);
1065 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 1063 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
1066 generic_fillattr(dentry->d_inode, stat); 1064 generic_fillattr(dentry->d_inode, stat);
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index c45e016b190f..3c28cdfb8c47 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
167 167
168const struct xattr_handler *v9fs_xattr_handlers[] = { 168const struct xattr_handler *v9fs_xattr_handlers[] = {
169 &v9fs_xattr_user_handler, 169 &v9fs_xattr_user_handler,
170 &v9fs_xattr_trusted_handler,
170#ifdef CONFIG_9P_FS_POSIX_ACL 171#ifdef CONFIG_9P_FS_POSIX_ACL
171 &v9fs_xattr_acl_access_handler, 172 &v9fs_xattr_acl_access_handler,
172 &v9fs_xattr_acl_default_handler, 173 &v9fs_xattr_acl_default_handler,
173#endif 174#endif
175#ifdef CONFIG_9P_FS_SECURITY
176 &v9fs_xattr_security_handler,
177#endif
174 NULL 178 NULL
175}; 179};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index eec348a3df71..d3e2ea3840be 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -20,6 +20,8 @@
20 20
21extern const struct xattr_handler *v9fs_xattr_handlers[]; 21extern const struct xattr_handler *v9fs_xattr_handlers[];
22extern struct xattr_handler v9fs_xattr_user_handler; 22extern struct xattr_handler v9fs_xattr_user_handler;
23extern struct xattr_handler v9fs_xattr_trusted_handler;
24extern struct xattr_handler v9fs_xattr_security_handler;
23extern const struct xattr_handler v9fs_xattr_acl_access_handler; 25extern const struct xattr_handler v9fs_xattr_acl_access_handler;
24extern const struct xattr_handler v9fs_xattr_acl_default_handler; 26extern const struct xattr_handler v9fs_xattr_acl_default_handler;
25 27
diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c
new file mode 100644
index 000000000000..cb247a142a6e
--- /dev/null
+++ b/fs/9p/xattr_security.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15
16#include <linux/module.h>
17#include <linux/string.h>
18#include <linux/fs.h>
19#include <linux/slab.h>
20#include "xattr.h"
21
22static int v9fs_xattr_security_get(struct dentry *dentry, const char *name,
23 void *buffer, size_t size, int type)
24{
25 int retval;
26 char *full_name;
27 size_t name_len;
28 size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
29
30 if (name == NULL)
31 return -EINVAL;
32
33 if (strcmp(name, "") == 0)
34 return -EINVAL;
35
36 name_len = strlen(name);
37 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
38 if (!full_name)
39 return -ENOMEM;
40 memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
41 memcpy(full_name+prefix_len, name, name_len);
42 full_name[prefix_len + name_len] = '\0';
43
44 retval = v9fs_xattr_get(dentry, full_name, buffer, size);
45 kfree(full_name);
46 return retval;
47}
48
49static int v9fs_xattr_security_set(struct dentry *dentry, const char *name,
50 const void *value, size_t size, int flags, int type)
51{
52 int retval;
53 char *full_name;
54 size_t name_len;
55 size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
56
57 if (name == NULL)
58 return -EINVAL;
59
60 if (strcmp(name, "") == 0)
61 return -EINVAL;
62
63 name_len = strlen(name);
64 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
65 if (!full_name)
66 return -ENOMEM;
67 memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
68 memcpy(full_name + prefix_len, name, name_len);
69 full_name[prefix_len + name_len] = '\0';
70
71 retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
72 kfree(full_name);
73 return retval;
74}
75
76struct xattr_handler v9fs_xattr_security_handler = {
77 .prefix = XATTR_SECURITY_PREFIX,
78 .get = v9fs_xattr_security_get,
79 .set = v9fs_xattr_security_set,
80};
diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c
new file mode 100644
index 000000000000..e30d33b8a3fb
--- /dev/null
+++ b/fs/9p/xattr_trusted.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright IBM Corporation, 2010
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15
16#include <linux/module.h>
17#include <linux/string.h>
18#include <linux/fs.h>
19#include <linux/slab.h>
20#include "xattr.h"
21
22static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name,
23 void *buffer, size_t size, int type)
24{
25 int retval;
26 char *full_name;
27 size_t name_len;
28 size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
29
30 if (name == NULL)
31 return -EINVAL;
32
33 if (strcmp(name, "") == 0)
34 return -EINVAL;
35
36 name_len = strlen(name);
37 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
38 if (!full_name)
39 return -ENOMEM;
40 memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
41 memcpy(full_name+prefix_len, name, name_len);
42 full_name[prefix_len + name_len] = '\0';
43
44 retval = v9fs_xattr_get(dentry, full_name, buffer, size);
45 kfree(full_name);
46 return retval;
47}
48
49static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name,
50 const void *value, size_t size, int flags, int type)
51{
52 int retval;
53 char *full_name;
54 size_t name_len;
55 size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
56
57 if (name == NULL)
58 return -EINVAL;
59
60 if (strcmp(name, "") == 0)
61 return -EINVAL;
62
63 name_len = strlen(name);
64 full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
65 if (!full_name)
66 return -ENOMEM;
67 memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
68 memcpy(full_name + prefix_len, name, name_len);
69 full_name[prefix_len + name_len] = '\0';
70
71 retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
72 kfree(full_name);
73 return retval;
74}
75
76struct xattr_handler v9fs_xattr_trusted_handler = {
77 .prefix = XATTR_TRUSTED_PREFIX,
78 .get = v9fs_xattr_trusted_get,
79 .set = v9fs_xattr_trusted_set,
80};
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index adcbb20f6511..d9fa68f26c41 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,6 +26,9 @@
26#ifndef NET_9P_TRANSPORT_H 26#ifndef NET_9P_TRANSPORT_H
27#define NET_9P_TRANSPORT_H 27#define NET_9P_TRANSPORT_H
28 28
29#define P9_DEF_MIN_RESVPORT (665U)
30#define P9_DEF_MAX_RESVPORT (1023U)
31
29/** 32/**
30 * struct p9_trans_module - transport module interface 33 * struct p9_trans_module - transport module interface
31 * @list: used to maintain a list of currently available transports 34 * @list: used to maintain a list of currently available transports
@@ -37,6 +40,8 @@
37 * @close: member function to discard a connection on this transport 40 * @close: member function to discard a connection on this transport
38 * @request: member function to issue a request to the transport 41 * @request: member function to issue a request to the transport
39 * @cancel: member function to cancel a request (if it hasn't been sent) 42 * @cancel: member function to cancel a request (if it hasn't been sent)
43 * @cancelled: member function to notify that a cancelled request will not
44 * not receive a reply
40 * 45 *
41 * This is the basic API for a transport module which is registered by the 46 * This is the basic API for a transport module which is registered by the
42 * transport module with the 9P core network module and used by the client 47 * transport module with the 9P core network module and used by the client
@@ -55,6 +60,7 @@ struct p9_trans_module {
55 void (*close) (struct p9_client *); 60 void (*close) (struct p9_client *);
56 int (*request) (struct p9_client *, struct p9_req_t *req); 61 int (*request) (struct p9_client *, struct p9_req_t *req);
57 int (*cancel) (struct p9_client *, struct p9_req_t *req); 62 int (*cancel) (struct p9_client *, struct p9_req_t *req);
63 int (*cancelled)(struct p9_client *, struct p9_req_t *req);
58 int (*zc_request)(struct p9_client *, struct p9_req_t *, 64 int (*zc_request)(struct p9_client *, struct p9_req_t *,
59 char *, char *, int , int, int, int); 65 char *, char *, int , int, int, int);
60}; 66};
diff --git a/net/9p/client.c b/net/9p/client.c
index 01f1779eba80..8b93cae2d11d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
204 return ret; 204 return ret;
205} 205}
206 206
207struct p9_fcall *p9_fcall_alloc(int alloc_msize)
208{
209 struct p9_fcall *fc;
210 fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
211 if (!fc)
212 return NULL;
213 fc->capacity = alloc_msize;
214 fc->sdata = (char *) fc + sizeof(struct p9_fcall);
215 return fc;
216}
217
207/** 218/**
208 * p9_tag_alloc - lookup/allocate a request by tag 219 * p9_tag_alloc - lookup/allocate a request by tag
209 * @c: client session to lookup tag within 220 * @c: client session to lookup tag within
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
256 col = tag % P9_ROW_MAXTAG; 267 col = tag % P9_ROW_MAXTAG;
257 268
258 req = &c->reqs[row][col]; 269 req = &c->reqs[row][col];
259 if (!req->tc) { 270 if (!req->wq) {
260 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); 271 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
261 if (!req->wq) { 272 if (!req->wq)
262 pr_err("Couldn't grow tag array\n"); 273 goto grow_failed;
263 return ERR_PTR(-ENOMEM);
264 }
265 init_waitqueue_head(req->wq); 274 init_waitqueue_head(req->wq);
266 req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
267 GFP_NOFS);
268 req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
269 GFP_NOFS);
270 if ((!req->tc) || (!req->rc)) {
271 pr_err("Couldn't grow tag array\n");
272 kfree(req->tc);
273 kfree(req->rc);
274 kfree(req->wq);
275 req->tc = req->rc = NULL;
276 req->wq = NULL;
277 return ERR_PTR(-ENOMEM);
278 }
279 req->tc->capacity = alloc_msize;
280 req->rc->capacity = alloc_msize;
281 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
282 req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
283 } 275 }
284 276
277 if (!req->tc)
278 req->tc = p9_fcall_alloc(alloc_msize);
279 if (!req->rc)
280 req->rc = p9_fcall_alloc(alloc_msize);
281 if (!req->tc || !req->rc)
282 goto grow_failed;
283
285 p9pdu_reset(req->tc); 284 p9pdu_reset(req->tc);
286 p9pdu_reset(req->rc); 285 p9pdu_reset(req->rc);
287 286
288 req->tc->tag = tag-1; 287 req->tc->tag = tag-1;
289 req->status = REQ_STATUS_ALLOC; 288 req->status = REQ_STATUS_ALLOC;
290 289
291 return &c->reqs[row][col]; 290 return req;
291
292grow_failed:
293 pr_err("Couldn't grow tag array\n");
294 kfree(req->tc);
295 kfree(req->rc);
296 kfree(req->wq);
297 req->tc = req->rc = NULL;
298 req->wq = NULL;
299 return ERR_PTR(-ENOMEM);
292} 300}
293 301
294/** 302/**
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
648 return PTR_ERR(req); 656 return PTR_ERR(req);
649 657
650 658
651 /* if we haven't received a response for oldreq, 659 /*
652 remove it from the list. */ 660 * if we haven't received a response for oldreq,
661 * remove it from the list, and notify the transport
662 * layer that the reply will never arrive.
663 */
653 spin_lock(&c->lock); 664 spin_lock(&c->lock);
654 if (oldreq->status == REQ_STATUS_FLSH) 665 if (oldreq->status == REQ_STATUS_FLSH) {
655 list_del(&oldreq->req_list); 666 list_del(&oldreq->req_list);
656 spin_unlock(&c->lock); 667 spin_unlock(&c->lock);
668 if (c->trans_mod->cancelled)
669 c->trans_mod->cancelled(c, req);
670 } else {
671 spin_unlock(&c->lock);
672 }
657 673
658 p9_free_req(c, req); 674 p9_free_req(c, req);
659 return 0; 675 return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 02efb25c2957..3ffda1b3799b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
63 int rfd; 63 int rfd;
64 int wfd; 64 int wfd;
65 u16 port; 65 u16 port;
66 int privport;
66}; 67};
67 68
68/** 69/**
@@ -87,12 +88,15 @@ struct p9_trans_fd {
87enum { 88enum {
88 /* Options that take integer arguments */ 89 /* Options that take integer arguments */
89 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, 90 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
91 /* Options that take no arguments */
92 Opt_privport,
90}; 93};
91 94
92static const match_table_t tokens = { 95static const match_table_t tokens = {
93 {Opt_port, "port=%u"}, 96 {Opt_port, "port=%u"},
94 {Opt_rfdno, "rfdno=%u"}, 97 {Opt_rfdno, "rfdno=%u"},
95 {Opt_wfdno, "wfdno=%u"}, 98 {Opt_wfdno, "wfdno=%u"},
99 {Opt_privport, "privport"},
96 {Opt_err, NULL}, 100 {Opt_err, NULL},
97}; 101};
98 102
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
161static LIST_HEAD(p9_poll_pending_list); 165static LIST_HEAD(p9_poll_pending_list);
162static DECLARE_WORK(p9_poll_work, p9_poll_workfn); 166static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
163 167
168static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
169static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
170
164static void p9_mux_poll_stop(struct p9_conn *m) 171static void p9_mux_poll_stop(struct p9_conn *m)
165{ 172{
166 unsigned long flags; 173 unsigned long flags;
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
741 if (!*p) 748 if (!*p)
742 continue; 749 continue;
743 token = match_token(p, tokens, args); 750 token = match_token(p, tokens, args);
744 if (token != Opt_err) { 751 if ((token != Opt_err) && (token != Opt_privport)) {
745 r = match_int(&args[0], &option); 752 r = match_int(&args[0], &option);
746 if (r < 0) { 753 if (r < 0) {
747 p9_debug(P9_DEBUG_ERROR, 754 p9_debug(P9_DEBUG_ERROR,
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
759 case Opt_wfdno: 766 case Opt_wfdno:
760 opts->wfd = option; 767 opts->wfd = option;
761 break; 768 break;
769 case Opt_privport:
770 opts->privport = 1;
771 break;
762 default: 772 default:
763 continue; 773 continue;
764 } 774 }
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
898 return 0; 908 return 0;
899} 909}
900 910
911static int p9_bind_privport(struct socket *sock)
912{
913 struct sockaddr_in cl;
914 int port, err = -EINVAL;
915
916 memset(&cl, 0, sizeof(cl));
917 cl.sin_family = AF_INET;
918 cl.sin_addr.s_addr = INADDR_ANY;
919 for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
920 cl.sin_port = htons((ushort)port);
921 err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
922 if (err != -EADDRINUSE)
923 break;
924 }
925 return err;
926}
927
928
901static int 929static int
902p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) 930p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
903{ 931{
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
926 return err; 954 return err;
927 } 955 }
928 956
957 if (opts.privport) {
958 err = p9_bind_privport(csocket);
959 if (err < 0) {
960 pr_err("%s (%d): problem binding to privport\n",
961 __func__, task_pid_nr(current));
962 sock_release(csocket);
963 return err;
964 }
965 }
966
929 err = csocket->ops->connect(csocket, 967 err = csocket->ops->connect(csocket,
930 (struct sockaddr *)&sin_server, 968 (struct sockaddr *)&sin_server,
931 sizeof(struct sockaddr_in), 0); 969 sizeof(struct sockaddr_in), 0);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a1..928f2bb9bf8d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
57#define P9_RDMA_IRD 0 57#define P9_RDMA_IRD 0
58#define P9_RDMA_ORD 0 58#define P9_RDMA_ORD 0
59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 59#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
60#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can 60#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
61 * safely advertise a maxsize
62 * of 64k */
63 61
64/** 62/**
65 * struct p9_trans_rdma - RDMA transport instance 63 * struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
75 * @sq_depth: The depth of the Send Queue 73 * @sq_depth: The depth of the Send Queue
76 * @sq_sem: Semaphore for the SQ 74 * @sq_sem: Semaphore for the SQ
77 * @rq_depth: The depth of the Receive Queue. 75 * @rq_depth: The depth of the Receive Queue.
78 * @rq_count: Count of requests in the Receive Queue. 76 * @rq_sem: Semaphore for the RQ
77 * @excess_rc : Amount of posted Receive Contexts without a pending request.
78 * See rdma_request()
79 * @addr: The remote peer's address 79 * @addr: The remote peer's address
80 * @req_lock: Protects the active request list 80 * @req_lock: Protects the active request list
81 * @cm_done: Completion event for connection management tracking 81 * @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
100 int sq_depth; 100 int sq_depth;
101 struct semaphore sq_sem; 101 struct semaphore sq_sem;
102 int rq_depth; 102 int rq_depth;
103 atomic_t rq_count; 103 struct semaphore rq_sem;
104 atomic_t excess_rc;
104 struct sockaddr_in addr; 105 struct sockaddr_in addr;
105 spinlock_t req_lock; 106 spinlock_t req_lock;
106 107
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
296 if (!req) 297 if (!req)
297 goto err_out; 298 goto err_out;
298 299
300 /* Check that we have not yet received a reply for this request.
301 */
302 if (unlikely(req->rc)) {
303 pr_err("Duplicate reply for request %d", tag);
304 goto err_out;
305 }
306
299 req->rc = c->rc; 307 req->rc = c->rc;
300 req->status = REQ_STATUS_RCVD; 308 req->status = REQ_STATUS_RCVD;
301 p9_client_cb(client, req); 309 p9_client_cb(client, req);
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
336 344
337 switch (c->wc_op) { 345 switch (c->wc_op) {
338 case IB_WC_RECV: 346 case IB_WC_RECV:
339 atomic_dec(&rdma->rq_count);
340 handle_recv(client, rdma, c, wc.status, wc.byte_len); 347 handle_recv(client, rdma, c, wc.status, wc.byte_len);
348 up(&rdma->rq_sem);
341 break; 349 break;
342 350
343 case IB_WC_SEND: 351 case IB_WC_SEND:
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
421 struct p9_rdma_context *c = NULL; 429 struct p9_rdma_context *c = NULL;
422 struct p9_rdma_context *rpl_context = NULL; 430 struct p9_rdma_context *rpl_context = NULL;
423 431
432 /* When an error occurs between posting the recv and the send,
433 * there will be a receive context posted without a pending request.
434 * Since there is no way to "un-post" it, we remember it and skip
435 * post_recv() for the next request.
436 * So here,
437 * see if we are this `next request' and need to absorb an excess rc.
438 * If yes, then drop and free our own, and do not recv_post().
439 **/
440 if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
441 if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
442 /* Got one ! */
443 kfree(req->rc);
444 req->rc = NULL;
445 goto dont_need_post_recv;
446 } else {
447 /* We raced and lost. */
448 atomic_inc(&rdma->excess_rc);
449 }
450 }
451
424 /* Allocate an fcall for the reply */ 452 /* Allocate an fcall for the reply */
425 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 453 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
426 if (!rpl_context) { 454 if (!rpl_context) {
427 err = -ENOMEM; 455 err = -ENOMEM;
428 goto err_close; 456 goto recv_error;
429 }
430
431 /*
432 * If the request has a buffer, steal it, otherwise
433 * allocate a new one. Typically, requests should already
434 * have receive buffers allocated and just swap them around
435 */
436 if (!req->rc) {
437 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
438 GFP_NOFS);
439 if (req->rc) {
440 req->rc->sdata = (char *) req->rc +
441 sizeof(struct p9_fcall);
442 req->rc->capacity = client->msize;
443 }
444 } 457 }
445 rpl_context->rc = req->rc; 458 rpl_context->rc = req->rc;
446 if (!rpl_context->rc) {
447 err = -ENOMEM;
448 goto err_free2;
449 }
450 459
451 /* 460 /*
452 * Post a receive buffer for this request. We need to ensure 461 * Post a receive buffer for this request. We need to ensure
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
455 * outstanding request, so we must keep a count to avoid 464 * outstanding request, so we must keep a count to avoid
456 * overflowing the RQ. 465 * overflowing the RQ.
457 */ 466 */
458 if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { 467 if (down_interruptible(&rdma->rq_sem)) {
459 err = post_recv(client, rpl_context); 468 err = -EINTR;
460 if (err) 469 goto recv_error;
461 goto err_free1; 470 }
462 } else
463 atomic_dec(&rdma->rq_count);
464 471
472 err = post_recv(client, rpl_context);
473 if (err) {
474 p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
475 goto recv_error;
476 }
465 /* remove posted receive buffer from request structure */ 477 /* remove posted receive buffer from request structure */
466 req->rc = NULL; 478 req->rc = NULL;
467 479
480dont_need_post_recv:
468 /* Post the request */ 481 /* Post the request */
469 c = kmalloc(sizeof *c, GFP_NOFS); 482 c = kmalloc(sizeof *c, GFP_NOFS);
470 if (!c) { 483 if (!c) {
471 err = -ENOMEM; 484 err = -ENOMEM;
472 goto err_free1; 485 goto send_error;
473 } 486 }
474 c->req = req; 487 c->req = req;
475 488
476 c->busa = ib_dma_map_single(rdma->cm_id->device, 489 c->busa = ib_dma_map_single(rdma->cm_id->device,
477 c->req->tc->sdata, c->req->tc->size, 490 c->req->tc->sdata, c->req->tc->size,
478 DMA_TO_DEVICE); 491 DMA_TO_DEVICE);
479 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 492 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
480 goto error; 493 err = -EIO;
494 goto send_error;
495 }
481 496
482 sge.addr = c->busa; 497 sge.addr = c->busa;
483 sge.length = c->req->tc->size; 498 sge.length = c->req->tc->size;
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
491 wr.sg_list = &sge; 506 wr.sg_list = &sge;
492 wr.num_sge = 1; 507 wr.num_sge = 1;
493 508
494 if (down_interruptible(&rdma->sq_sem)) 509 if (down_interruptible(&rdma->sq_sem)) {
495 goto error; 510 err = -EINTR;
511 goto send_error;
512 }
496 513
497 return ib_post_send(rdma->qp, &wr, &bad_wr); 514 err = ib_post_send(rdma->qp, &wr, &bad_wr);
515 if (err)
516 goto send_error;
498 517
499 error: 518 /* Success */
519 return 0;
520
521 /* Handle errors that happened during or while preparing the send: */
522 send_error:
500 kfree(c); 523 kfree(c);
501 kfree(rpl_context->rc); 524 p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
502 kfree(rpl_context); 525
503 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 526 /* Ach.
504 return -EIO; 527 * We did recv_post(), but not send. We have one recv_post in excess.
505 err_free1: 528 */
506 kfree(rpl_context->rc); 529 atomic_inc(&rdma->excess_rc);
507 err_free2: 530 return err;
531
532 /* Handle errors that happened during or while preparing post_recv(): */
533 recv_error:
508 kfree(rpl_context); 534 kfree(rpl_context);
509 err_close:
510 spin_lock_irqsave(&rdma->req_lock, flags); 535 spin_lock_irqsave(&rdma->req_lock, flags);
511 if (rdma->state < P9_RDMA_CLOSING) { 536 if (rdma->state < P9_RDMA_CLOSING) {
512 rdma->state = P9_RDMA_CLOSING; 537 rdma->state = P9_RDMA_CLOSING;
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
551 spin_lock_init(&rdma->req_lock); 576 spin_lock_init(&rdma->req_lock);
552 init_completion(&rdma->cm_done); 577 init_completion(&rdma->cm_done);
553 sema_init(&rdma->sq_sem, rdma->sq_depth); 578 sema_init(&rdma->sq_sem, rdma->sq_depth);
554 atomic_set(&rdma->rq_count, 0); 579 sema_init(&rdma->rq_sem, rdma->rq_depth);
580 atomic_set(&rdma->excess_rc, 0);
555 581
556 return rdma; 582 return rdma;
557} 583}
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
562 return 1; 588 return 1;
563} 589}
564 590
591/* A request has been fully flushed without a reply.
592 * That means we have posted one buffer in excess.
593 */
594static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
595{
596 struct p9_trans_rdma *rdma = client->trans;
597
598 atomic_inc(&rdma->excess_rc);
599 return 0;
600}
601
565/** 602/**
566 * trans_create_rdma - Transport method for creating atransport instance 603 * trans_create_rdma - Transport method for creating atransport instance
567 * @client: client instance 604 * @client: client instance