aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-27 13:14:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-27 13:14:39 -0400
commitd2c3ac7e7e39ec6d37e4114ae7444948561e59af (patch)
treec0e6a9eed0139de4e757fbf64498fe8c0b291e44
parent546fac60739ef8d7cbf8ce0b8251a519f68b2804 (diff)
parent901f1379f6c9dc2d73b51971d129a6f7d5b9b20a (diff)
Merge branch 'for-4.2' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "A relatively quiet cycle, with a mix of cleanup and smaller bugfixes" * 'for-4.2' of git://linux-nfs.org/~bfields/linux: (24 commits) sunrpc: use sg_init_one() in krb5_rc4_setup_enc/seq_key() nfsd: wrap too long lines in nfsd4_encode_read nfsd: fput rd_file from XDR encode context nfsd: take struct file setup fully into nfs4_preprocess_stateid_op nfsd: refactor nfs4_preprocess_stateid_op nfsd: clean up raparams handling nfsd: use swap() in sort_pacl_range() rpcrdma: Merge svcrdma and xprtrdma modules into one svcrdma: Add a separate "max data segs macro for svcrdma svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL svcrdma: Keep rpcrdma_msg fields in network byte-order svcrdma: Fix byte-swapping in svc_rdma_sendto.c nfsd: Update callback sequnce id only CB_SEQUENCE success nfsd: Reset cb_status in nfsd4_cb_prepare() at retrying svcrdma: Remove svc_rdma_xdr_decode_deferred_req() SUNRPC: Move EXPORT_SYMBOL for svc_process uapi/nfs: Add NFSv4.1 ACL definitions nfsd: Remove dead declarations nfsd: work around a gcc-5.1 warning nfsd: Checking for acl support does not require fetching any acls ...
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.txt44
-rw-r--r--fs/nfsd/nfs3xdr.c12
-rw-r--r--fs/nfsd/nfs4acl.c18
-rw-r--r--fs/nfsd/nfs4callback.c15
-rw-r--r--fs/nfsd/nfs4proc.c43
-rw-r--r--fs/nfsd/nfs4state.c148
-rw-r--r--fs/nfsd/nfs4xdr.c75
-rw-r--r--fs/nfsd/nfsproc.c52
-rw-r--r--fs/nfsd/state.h7
-rw-r--r--fs/nfsd/vfs.c128
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--include/linux/sunrpc/svc_rdma.h11
-rw-r--r--include/uapi/linux/nfs4.h7
-rw-r--r--net/sunrpc/Kconfig28
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprtrdma/Makefile14
-rw-r--r--net/sunrpc/xprtrdma/module.c46
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c140
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c36
-rw-r--r--net/sunrpc/xprtrdma/transport.c13
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h11
27 files changed, 407 insertions, 492 deletions
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt
index 64ced5149d37..1a5d82180b84 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.txt
@@ -68,16 +68,10 @@ sockets-enqueued
68 rate of change for this counter is zero; significantly non-zero 68 rate of change for this counter is zero; significantly non-zero
69 values may indicate a performance limitation. 69 values may indicate a performance limitation.
70 70
71 This can happen either because there are too few nfsd threads in the 71 This can happen because there are too few nfsd threads in the thread
72 thread pool for the NFS workload (the workload is thread-limited), 72 pool for the NFS workload (the workload is thread-limited), in which
73 or because the NFS workload needs more CPU time than is available in 73 case configuring more nfsd threads will probably improve the
74 the thread pool (the workload is CPU-limited). In the former case, 74 performance of the NFS workload.
75 configuring more nfsd threads will probably improve the performance
76 of the NFS workload. In the latter case, the sunrpc server layer is
77 already choosing not to wake idle nfsd threads because there are too
78 many nfsd threads which want to run but cannot, so configuring more
79 nfsd threads will make no difference whatsoever. The overloads-avoided
80 statistic (see below) can be used to distinguish these cases.
81 75
82threads-woken 76threads-woken
83 Counts how many times an idle nfsd thread is woken to try to 77 Counts how many times an idle nfsd thread is woken to try to
@@ -88,36 +82,6 @@ threads-woken
88 thing. The ideal rate of change for this counter will be close 82 thing. The ideal rate of change for this counter will be close
89 to but less than the rate of change of the packets-arrived counter. 83 to but less than the rate of change of the packets-arrived counter.
90 84
91overloads-avoided
92 Counts how many times the sunrpc server layer chose not to wake an
93 nfsd thread, despite the presence of idle nfsd threads, because
94 too many nfsd threads had been recently woken but could not get
95 enough CPU time to actually run.
96
97 This statistic counts a circumstance where the sunrpc layer
98 heuristically avoids overloading the CPU scheduler with too many
99 runnable nfsd threads. The ideal rate of change for this counter
100 is zero. Significant non-zero values indicate that the workload
101 is CPU limited. Usually this is associated with heavy CPU usage
102 on all the CPUs in the nfsd thread pool.
103
104 If a sustained large overloads-avoided rate is detected on a pool,
105 the top(1) utility should be used to check for the following
106 pattern of CPU usage on all the CPUs associated with the given
107 nfsd thread pool.
108
109 - %us ~= 0 (as you're *NOT* running applications on your NFS server)
110
111 - %wa ~= 0
112
113 - %id ~= 0
114
115 - %sy + %hi + %si ~= 100
116
117 If this pattern is seen, configuring more nfsd threads will *not*
118 improve the performance of the workload. If this patten is not
119 seen, then something more subtle is wrong.
120
121threads-timedout 85threads-timedout
122 Counts how many times an nfsd thread triggered an idle timeout, 86 Counts how many times an nfsd thread triggered an idle timeout,
123 i.e. was not woken to handle any incoming network packets for 87 i.e. was not woken to handle any incoming network packets for
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e4b2b4322553..f6e7cbabac5a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -805,7 +805,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
805 805
806static __be32 806static __be32
807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
808 const char *name, int namlen) 808 const char *name, int namlen, u64 ino)
809{ 809{
810 struct svc_export *exp; 810 struct svc_export *exp;
811 struct dentry *dparent, *dchild; 811 struct dentry *dparent, *dchild;
@@ -830,19 +830,21 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
830 goto out; 830 goto out;
831 if (d_really_is_negative(dchild)) 831 if (d_really_is_negative(dchild))
832 goto out; 832 goto out;
833 if (dchild->d_inode->i_ino != ino)
834 goto out;
833 rv = fh_compose(fhp, exp, dchild, &cd->fh); 835 rv = fh_compose(fhp, exp, dchild, &cd->fh);
834out: 836out:
835 dput(dchild); 837 dput(dchild);
836 return rv; 838 return rv;
837} 839}
838 840
839static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 841static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
840{ 842{
841 struct svc_fh *fh = &cd->scratch; 843 struct svc_fh *fh = &cd->scratch;
842 __be32 err; 844 __be32 err;
843 845
844 fh_init(fh, NFS3_FHSIZE); 846 fh_init(fh, NFS3_FHSIZE);
845 err = compose_entry_fh(cd, fh, name, namlen); 847 err = compose_entry_fh(cd, fh, name, namlen, ino);
846 if (err) { 848 if (err) {
847 *p++ = 0; 849 *p++ = 0;
848 *p++ = 0; 850 *p++ = 0;
@@ -927,7 +929,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
927 p = encode_entry_baggage(cd, p, name, namlen, ino); 929 p = encode_entry_baggage(cd, p, name, namlen, ino);
928 930
929 if (plus) 931 if (plus)
930 p = encode_entryplus_baggage(cd, p, name, namlen); 932 p = encode_entryplus_baggage(cd, p, name, namlen, ino);
931 num_entry_words = p - cd->buffer; 933 num_entry_words = p - cd->buffer;
932 } else if (*(page+1) != NULL) { 934 } else if (*(page+1) != NULL) {
933 /* temporarily encode entry into next page, then move back to 935 /* temporarily encode entry into next page, then move back to
@@ -941,7 +943,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
941 p1 = encode_entry_baggage(cd, p1, name, namlen, ino); 943 p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
942 944
943 if (plus) 945 if (plus)
944 p1 = encode_entryplus_baggage(cd, p1, name, namlen); 946 p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
945 947
946 /* determine entry word length and lengths to go in pages */ 948 /* determine entry word length and lengths to go in pages */
947 num_entry_words = p1 - tmp; 949 num_entry_words = p1 - tmp;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 67242bf7c6cc..eb5accf1b37f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -52,10 +52,6 @@
52#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE) 52#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
53#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) 53#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
54 54
55/* We don't support these bits; insist they be neither allowed nor denied */
56#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
57 | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
58
59/* flags used to simulate posix default ACLs */ 55/* flags used to simulate posix default ACLs */
60#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ 56#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
61 | NFS4_ACE_DIRECTORY_INHERIT_ACE) 57 | NFS4_ACE_DIRECTORY_INHERIT_ACE)
@@ -64,9 +60,6 @@
64 | NFS4_ACE_INHERIT_ONLY_ACE \ 60 | NFS4_ACE_INHERIT_ONLY_ACE \
65 | NFS4_ACE_IDENTIFIER_GROUP) 61 | NFS4_ACE_IDENTIFIER_GROUP)
66 62
67#define MASK_EQUAL(mask1, mask2) \
68 ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
69
70static u32 63static u32
71mask_from_posix(unsigned short perm, unsigned int flags) 64mask_from_posix(unsigned short perm, unsigned int flags)
72{ 65{
@@ -126,11 +119,6 @@ low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
126 *mode |= ACL_EXECUTE; 119 *mode |= ACL_EXECUTE;
127} 120}
128 121
129struct ace_container {
130 struct nfs4_ace *ace;
131 struct list_head ace_l;
132};
133
134static short ace2type(struct nfs4_ace *); 122static short ace2type(struct nfs4_ace *);
135static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, 123static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
136 unsigned int); 124 unsigned int);
@@ -384,7 +372,6 @@ pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
384static void 372static void
385sort_pacl_range(struct posix_acl *pacl, int start, int end) { 373sort_pacl_range(struct posix_acl *pacl, int start, int end) {
386 int sorted = 0, i; 374 int sorted = 0, i;
387 struct posix_acl_entry tmp;
388 375
389 /* We just do a bubble sort; easy to do in place, and we're not 376 /* We just do a bubble sort; easy to do in place, and we're not
390 * expecting acl's to be long enough to justify anything more. */ 377 * expecting acl's to be long enough to justify anything more. */
@@ -394,9 +381,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
394 if (pace_gt(&pacl->a_entries[i], 381 if (pace_gt(&pacl->a_entries[i],
395 &pacl->a_entries[i+1])) { 382 &pacl->a_entries[i+1])) {
396 sorted = 0; 383 sorted = 0;
397 tmp = pacl->a_entries[i]; 384 swap(pacl->a_entries[i],
398 pacl->a_entries[i] = pacl->a_entries[i+1]; 385 pacl->a_entries[i + 1]);
399 pacl->a_entries[i+1] = tmp;
400 } 386 }
401 } 387 }
402 } 388 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 5694cfb7a47b..a49201835a97 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -455,6 +455,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
455 if (unlikely(status || cb->cb_status)) 455 if (unlikely(status || cb->cb_status))
456 return status; 456 return status;
457 457
458 cb->cb_update_seq_nr = true;
458 return decode_cb_sequence4resok(xdr, cb); 459 return decode_cb_sequence4resok(xdr, cb);
459} 460}
460 461
@@ -875,6 +876,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
875 u32 minorversion = clp->cl_minorversion; 876 u32 minorversion = clp->cl_minorversion;
876 877
877 cb->cb_minorversion = minorversion; 878 cb->cb_minorversion = minorversion;
879 cb->cb_update_seq_nr = false;
880 cb->cb_status = 0;
878 if (minorversion) { 881 if (minorversion) {
879 if (!nfsd41_cb_get_slot(clp, task)) 882 if (!nfsd41_cb_get_slot(clp, task))
880 return; 883 return;
@@ -891,9 +894,16 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
891 clp->cl_minorversion); 894 clp->cl_minorversion);
892 895
893 if (clp->cl_minorversion) { 896 if (clp->cl_minorversion) {
894 /* No need for lock, access serialized in nfsd4_cb_prepare */ 897 /*
895 if (!task->tk_status) 898 * No need for lock, access serialized in nfsd4_cb_prepare
899 *
900 * RFC5661 20.9.3
901 * If CB_SEQUENCE returns an error, then the state of the slot
902 * (sequence ID, cached reply) MUST NOT change.
903 */
904 if (cb->cb_update_seq_nr)
896 ++clp->cl_cb_session->se_cb_seq_nr; 905 ++clp->cl_cb_session->se_cb_seq_nr;
906
897 clear_bit(0, &clp->cl_cb_slot_busy); 907 clear_bit(0, &clp->cl_cb_slot_busy);
898 rpc_wake_up_next(&clp->cl_cb_waitq); 908 rpc_wake_up_next(&clp->cl_cb_waitq);
899 dprintk("%s: freed slot, new seqid=%d\n", __func__, 909 dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1090,6 +1100,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
1090 cb->cb_ops = ops; 1100 cb->cb_ops = ops;
1091 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1101 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1092 cb->cb_status = 0; 1102 cb->cb_status = 0;
1103 cb->cb_update_seq_nr = false;
1093 cb->cb_need_restart = false; 1104 cb->cb_need_restart = false;
1094} 1105}
1095 1106
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 864e2003e8de..90cfda75313c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -760,8 +760,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
760{ 760{
761 __be32 status; 761 __be32 status;
762 762
763 /* no need to check permission - this will be done in nfsd_read() */
764
765 read->rd_filp = NULL; 763 read->rd_filp = NULL;
766 if (read->rd_offset >= OFFSET_MAX) 764 if (read->rd_offset >= OFFSET_MAX)
767 return nfserr_inval; 765 return nfserr_inval;
@@ -778,9 +776,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
778 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); 776 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
779 777
780 /* check stateid */ 778 /* check stateid */
781 if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), 779 status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
782 cstate, &read->rd_stateid, 780 RD_STATE, &read->rd_filp, &read->rd_tmp_file);
783 RD_STATE, &read->rd_filp))) { 781 if (status) {
784 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 782 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
785 goto out; 783 goto out;
786 } 784 }
@@ -924,8 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
924 int err; 922 int err;
925 923
926 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 924 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
927 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, 925 status = nfs4_preprocess_stateid_op(rqstp, cstate,
928 &setattr->sa_stateid, WR_STATE, NULL); 926 &setattr->sa_stateid, WR_STATE, NULL, NULL);
929 if (status) { 927 if (status) {
930 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); 928 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
931 return status; 929 return status;
@@ -986,13 +984,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
986 unsigned long cnt; 984 unsigned long cnt;
987 int nvecs; 985 int nvecs;
988 986
989 /* no need to check permission - this will be done in nfsd_write() */
990
991 if (write->wr_offset >= OFFSET_MAX) 987 if (write->wr_offset >= OFFSET_MAX)
992 return nfserr_inval; 988 return nfserr_inval;
993 989
994 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), 990 status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
995 cstate, stateid, WR_STATE, &filp); 991 &filp, NULL);
996 if (status) { 992 if (status) {
997 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 993 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
998 return status; 994 return status;
@@ -1005,11 +1001,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1005 nvecs = fill_in_write_vector(rqstp->rq_vec, write); 1001 nvecs = fill_in_write_vector(rqstp->rq_vec, write);
1006 WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); 1002 WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
1007 1003
1008 status = nfsd_write(rqstp, &cstate->current_fh, filp, 1004 status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
1009 write->wr_offset, rqstp->rq_vec, nvecs, 1005 write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
1010 &cnt, &write->wr_how_written); 1006 &write->wr_how_written);
1011 if (filp) 1007 fput(filp);
1012 fput(filp);
1013 1008
1014 write->wr_bytes_written = cnt; 1009 write->wr_bytes_written = cnt;
1015 1010
@@ -1023,15 +1018,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1023 __be32 status = nfserr_notsupp; 1018 __be32 status = nfserr_notsupp;
1024 struct file *file; 1019 struct file *file;
1025 1020
1026 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, 1021 status = nfs4_preprocess_stateid_op(rqstp, cstate,
1027 &fallocate->falloc_stateid, 1022 &fallocate->falloc_stateid,
1028 WR_STATE, &file); 1023 WR_STATE, &file, NULL);
1029 if (status != nfs_ok) { 1024 if (status != nfs_ok) {
1030 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); 1025 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
1031 return status; 1026 return status;
1032 } 1027 }
1033 if (!file)
1034 return nfserr_bad_stateid;
1035 1028
1036 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, 1029 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
1037 fallocate->falloc_offset, 1030 fallocate->falloc_offset,
@@ -1064,15 +1057,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1064 __be32 status; 1057 __be32 status;
1065 struct file *file; 1058 struct file *file;
1066 1059
1067 status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, 1060 status = nfs4_preprocess_stateid_op(rqstp, cstate,
1068 &seek->seek_stateid, 1061 &seek->seek_stateid,
1069 RD_STATE, &file); 1062 RD_STATE, &file, NULL);
1070 if (status) { 1063 if (status) {
1071 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); 1064 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
1072 return status; 1065 return status;
1073 } 1066 }
1074 if (!file)
1075 return nfserr_bad_stateid;
1076 1067
1077 switch (seek->seek_whence) { 1068 switch (seek->seek_whence) {
1078 case NFS4_CONTENT_DATA: 1069 case NFS4_CONTENT_DATA:
@@ -1732,10 +1723,6 @@ encode_op:
1732 be32_to_cpu(status)); 1723 be32_to_cpu(status));
1733 1724
1734 nfsd4_cstate_clear_replay(cstate); 1725 nfsd4_cstate_clear_replay(cstate);
1735 /* XXX Ugh, we need to get rid of this kind of special case: */
1736 if (op->opnum == OP_READ && op->u.read.rd_filp)
1737 fput(op->u.read.rd_filp);
1738
1739 nfsd4_increment_op_stats(op->opnum); 1726 nfsd4_increment_op_stats(op->opnum);
1740 } 1727 }
1741 1728
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 039f9c8a95e8..61dfb33f0559 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3861,7 +3861,7 @@ static __be32
3861nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) 3861nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
3862{ 3862{
3863 __be32 status; 3863 __be32 status;
3864 unsigned char old_deny_bmap; 3864 unsigned char old_deny_bmap = stp->st_deny_bmap;
3865 3865
3866 if (!test_access(open->op_share_access, stp)) 3866 if (!test_access(open->op_share_access, stp))
3867 return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); 3867 return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
@@ -3870,7 +3870,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
3870 spin_lock(&fp->fi_lock); 3870 spin_lock(&fp->fi_lock);
3871 status = nfs4_file_check_deny(fp, open->op_share_deny); 3871 status = nfs4_file_check_deny(fp, open->op_share_deny);
3872 if (status == nfs_ok) { 3872 if (status == nfs_ok) {
3873 old_deny_bmap = stp->st_deny_bmap;
3874 set_deny(open->op_share_deny, stp); 3873 set_deny(open->op_share_deny, stp);
3875 fp->fi_share_deny |= 3874 fp->fi_share_deny |=
3876 (open->op_share_deny & NFS4_SHARE_DENY_BOTH); 3875 (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
@@ -4574,85 +4573,130 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
4574 return nfs_ok; 4573 return nfs_ok;
4575} 4574}
4576 4575
4576static struct file *
4577nfs4_find_file(struct nfs4_stid *s, int flags)
4578{
4579 if (!s)
4580 return NULL;
4581
4582 switch (s->sc_type) {
4583 case NFS4_DELEG_STID:
4584 if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
4585 return NULL;
4586 return get_file(s->sc_file->fi_deleg_file);
4587 case NFS4_OPEN_STID:
4588 case NFS4_LOCK_STID:
4589 if (flags & RD_STATE)
4590 return find_readable_file(s->sc_file);
4591 else
4592 return find_writeable_file(s->sc_file);
4593 break;
4594 }
4595
4596 return NULL;
4597}
4598
4599static __be32
4600nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
4601{
4602 __be32 status;
4603
4604 status = nfs4_check_fh(fhp, ols);
4605 if (status)
4606 return status;
4607 status = nfsd4_check_openowner_confirmed(ols);
4608 if (status)
4609 return status;
4610 return nfs4_check_openmode(ols, flags);
4611}
4612
4613static __be32
4614nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
4615 struct file **filpp, bool *tmp_file, int flags)
4616{
4617 int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
4618 struct file *file;
4619 __be32 status;
4620
4621 file = nfs4_find_file(s, flags);
4622 if (file) {
4623 status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
4624 acc | NFSD_MAY_OWNER_OVERRIDE);
4625 if (status) {
4626 fput(file);
4627 return status;
4628 }
4629
4630 *filpp = file;
4631 } else {
4632 status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
4633 if (status)
4634 return status;
4635
4636 if (tmp_file)
4637 *tmp_file = true;
4638 }
4639
4640 return 0;
4641}
4642
4577/* 4643/*
4578* Checks for stateid operations 4644 * Checks for stateid operations
4579*/ 4645 */
4580__be32 4646__be32
4581nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, 4647nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
4582 stateid_t *stateid, int flags, struct file **filpp) 4648 struct nfsd4_compound_state *cstate, stateid_t *stateid,
4649 int flags, struct file **filpp, bool *tmp_file)
4583{ 4650{
4584 struct nfs4_stid *s; 4651 struct svc_fh *fhp = &cstate->current_fh;
4585 struct nfs4_ol_stateid *stp = NULL; 4652 struct inode *ino = d_inode(fhp->fh_dentry);
4586 struct nfs4_delegation *dp = NULL; 4653 struct net *net = SVC_NET(rqstp);
4587 struct svc_fh *current_fh = &cstate->current_fh;
4588 struct inode *ino = d_inode(current_fh->fh_dentry);
4589 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 4654 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
4590 struct file *file = NULL; 4655 struct nfs4_stid *s = NULL;
4591 __be32 status; 4656 __be32 status;
4592 4657
4593 if (filpp) 4658 if (filpp)
4594 *filpp = NULL; 4659 *filpp = NULL;
4660 if (tmp_file)
4661 *tmp_file = false;
4595 4662
4596 if (grace_disallows_io(net, ino)) 4663 if (grace_disallows_io(net, ino))
4597 return nfserr_grace; 4664 return nfserr_grace;
4598 4665
4599 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4666 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
4600 return check_special_stateids(net, current_fh, stateid, flags); 4667 status = check_special_stateids(net, fhp, stateid, flags);
4668 goto done;
4669 }
4601 4670
4602 status = nfsd4_lookup_stateid(cstate, stateid, 4671 status = nfsd4_lookup_stateid(cstate, stateid,
4603 NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, 4672 NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
4604 &s, nn); 4673 &s, nn);
4605 if (status) 4674 if (status)
4606 return status; 4675 return status;
4607 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); 4676 status = check_stateid_generation(stateid, &s->sc_stateid,
4677 nfsd4_has_session(cstate));
4608 if (status) 4678 if (status)
4609 goto out; 4679 goto out;
4680
4610 switch (s->sc_type) { 4681 switch (s->sc_type) {
4611 case NFS4_DELEG_STID: 4682 case NFS4_DELEG_STID:
4612 dp = delegstateid(s); 4683 status = nfs4_check_delegmode(delegstateid(s), flags);
4613 status = nfs4_check_delegmode(dp, flags);
4614 if (status)
4615 goto out;
4616 if (filpp) {
4617 file = dp->dl_stid.sc_file->fi_deleg_file;
4618 if (!file) {
4619 WARN_ON_ONCE(1);
4620 status = nfserr_serverfault;
4621 goto out;
4622 }
4623 get_file(file);
4624 }
4625 break; 4684 break;
4626 case NFS4_OPEN_STID: 4685 case NFS4_OPEN_STID:
4627 case NFS4_LOCK_STID: 4686 case NFS4_LOCK_STID:
4628 stp = openlockstateid(s); 4687 status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
4629 status = nfs4_check_fh(current_fh, stp);
4630 if (status)
4631 goto out;
4632 status = nfsd4_check_openowner_confirmed(stp);
4633 if (status)
4634 goto out;
4635 status = nfs4_check_openmode(stp, flags);
4636 if (status)
4637 goto out;
4638 if (filpp) {
4639 struct nfs4_file *fp = stp->st_stid.sc_file;
4640
4641 if (flags & RD_STATE)
4642 file = find_readable_file(fp);
4643 else
4644 file = find_writeable_file(fp);
4645 }
4646 break; 4688 break;
4647 default: 4689 default:
4648 status = nfserr_bad_stateid; 4690 status = nfserr_bad_stateid;
4649 goto out; 4691 break;
4650 } 4692 }
4651 status = nfs_ok; 4693
4652 if (file) 4694done:
4653 *filpp = file; 4695 if (!status && filpp)
4696 status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
4654out: 4697out:
4655 nfs4_put_stid(s); 4698 if (s)
4699 nfs4_put_stid(s);
4656 return status; 4700 return status;
4657} 4701}
4658 4702
@@ -5505,7 +5549,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
5505 __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 5549 __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
5506 if (!err) { 5550 if (!err) {
5507 err = nfserrno(vfs_test_lock(file, lock)); 5551 err = nfserrno(vfs_test_lock(file, lock));
5508 nfsd_close(file); 5552 fput(file);
5509 } 5553 }
5510 return err; 5554 return err;
5511} 5555}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 158badf945df..54633858733a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -33,6 +33,7 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 34 */
35 35
36#include <linux/file.h>
36#include <linux/slab.h> 37#include <linux/slab.h>
37#include <linux/namei.h> 38#include <linux/namei.h>
38#include <linux/statfs.h> 39#include <linux/statfs.h>
@@ -2227,7 +2228,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
2227 u32 rdattr_err = 0; 2228 u32 rdattr_err = 0;
2228 __be32 status; 2229 __be32 status;
2229 int err; 2230 int err;
2230 int aclsupport = 0;
2231 struct nfs4_acl *acl = NULL; 2231 struct nfs4_acl *acl = NULL;
2232 void *context = NULL; 2232 void *context = NULL;
2233 int contextlen; 2233 int contextlen;
@@ -2274,19 +2274,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
2274 goto out; 2274 goto out;
2275 fhp = tempfh; 2275 fhp = tempfh;
2276 } 2276 }
2277 if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT 2277 if (bmval0 & FATTR4_WORD0_ACL) {
2278 | FATTR4_WORD0_SUPPORTED_ATTRS)) {
2279 err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); 2278 err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
2280 aclsupport = (err == 0); 2279 if (err == -EOPNOTSUPP)
2281 if (bmval0 & FATTR4_WORD0_ACL) { 2280 bmval0 &= ~FATTR4_WORD0_ACL;
2282 if (err == -EOPNOTSUPP) 2281 else if (err == -EINVAL) {
2283 bmval0 &= ~FATTR4_WORD0_ACL; 2282 status = nfserr_attrnotsupp;
2284 else if (err == -EINVAL) { 2283 goto out;
2285 status = nfserr_attrnotsupp; 2284 } else if (err != 0)
2286 goto out; 2285 goto out_nfserr;
2287 } else if (err != 0)
2288 goto out_nfserr;
2289 }
2290 } 2286 }
2291 2287
2292#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 2288#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
@@ -2338,7 +2334,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
2338 u32 word1 = nfsd_suppattrs1(minorversion); 2334 u32 word1 = nfsd_suppattrs1(minorversion);
2339 u32 word2 = nfsd_suppattrs2(minorversion); 2335 u32 word2 = nfsd_suppattrs2(minorversion);
2340 2336
2341 if (!aclsupport) 2337 if (!IS_POSIXACL(dentry->d_inode))
2342 word0 &= ~FATTR4_WORD0_ACL; 2338 word0 &= ~FATTR4_WORD0_ACL;
2343 if (!contextsupport) 2339 if (!contextsupport)
2344 word2 &= ~FATTR4_WORD2_SECURITY_LABEL; 2340 word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
@@ -2486,7 +2482,7 @@ out_acl:
2486 p = xdr_reserve_space(xdr, 4); 2482 p = xdr_reserve_space(xdr, 4);
2487 if (!p) 2483 if (!p)
2488 goto out_resource; 2484 goto out_resource;
2489 *p++ = cpu_to_be32(aclsupport ? 2485 *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
2490 ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); 2486 ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
2491 } 2487 }
2492 if (bmval0 & FATTR4_WORD0_CANSETTIME) { 2488 if (bmval0 & FATTR4_WORD0_CANSETTIME) {
@@ -3422,52 +3418,51 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3422 unsigned long maxcount; 3418 unsigned long maxcount;
3423 struct xdr_stream *xdr = &resp->xdr; 3419 struct xdr_stream *xdr = &resp->xdr;
3424 struct file *file = read->rd_filp; 3420 struct file *file = read->rd_filp;
3425 struct svc_fh *fhp = read->rd_fhp;
3426 int starting_len = xdr->buf->len; 3421 int starting_len = xdr->buf->len;
3427 struct raparms *ra; 3422 struct raparms *ra = NULL;
3428 __be32 *p; 3423 __be32 *p;
3429 __be32 err;
3430 3424
3431 if (nfserr) 3425 if (nfserr)
3432 return nfserr; 3426 goto out;
3433 3427
3434 p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ 3428 p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
3435 if (!p) { 3429 if (!p) {
3436 WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); 3430 WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
3437 return nfserr_resource; 3431 nfserr = nfserr_resource;
3432 goto out;
3438 } 3433 }
3439 if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { 3434 if (resp->xdr.buf->page_len &&
3435 test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
3440 WARN_ON_ONCE(1); 3436 WARN_ON_ONCE(1);
3441 return nfserr_resource; 3437 nfserr = nfserr_resource;
3438 goto out;
3442 } 3439 }
3443 xdr_commit_encode(xdr); 3440 xdr_commit_encode(xdr);
3444 3441
3445 maxcount = svc_max_payload(resp->rqstp); 3442 maxcount = svc_max_payload(resp->rqstp);
3446 maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); 3443 maxcount = min_t(unsigned long, maxcount,
3444 (xdr->buf->buflen - xdr->buf->len));
3447 maxcount = min_t(unsigned long, maxcount, read->rd_length); 3445 maxcount = min_t(unsigned long, maxcount, read->rd_length);
3448 3446
3449 if (read->rd_filp) 3447 if (read->rd_tmp_file)
3450 err = nfsd_permission(resp->rqstp, fhp->fh_export, 3448 ra = nfsd_init_raparms(file);
3451 fhp->fh_dentry,
3452 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
3453 else
3454 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
3455 &file, &ra);
3456 if (err)
3457 goto err_truncate;
3458 3449
3459 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) 3450 if (file->f_op->splice_read &&
3460 err = nfsd4_encode_splice_read(resp, read, file, maxcount); 3451 test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
3452 nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
3461 else 3453 else
3462 err = nfsd4_encode_readv(resp, read, file, maxcount); 3454 nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
3463 3455
3464 if (!read->rd_filp) 3456 if (ra)
3465 nfsd_put_tmp_read_open(file, ra); 3457 nfsd_put_raparams(file, ra);
3466 3458
3467err_truncate: 3459 if (nfserr)
3468 if (err)
3469 xdr_truncate_encode(xdr, starting_len); 3460 xdr_truncate_encode(xdr, starting_len);
3470 return err; 3461
3462out:
3463 if (file)
3464 fput(file);
3465 return nfserr;
3471} 3466}
3472 3467
3473static __be32 3468static __be32
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index aecbcd34d336..4cd78ef4c95c 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -59,13 +59,61 @@ static __be32
59nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp, 59nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
60 struct nfsd_attrstat *resp) 60 struct nfsd_attrstat *resp)
61{ 61{
62 struct iattr *iap = &argp->attrs;
63 struct svc_fh *fhp;
62 __be32 nfserr; 64 __be32 nfserr;
65
63 dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", 66 dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
64 SVCFH_fmt(&argp->fh), 67 SVCFH_fmt(&argp->fh),
65 argp->attrs.ia_valid, (long) argp->attrs.ia_size); 68 argp->attrs.ia_valid, (long) argp->attrs.ia_size);
66 69
67 fh_copy(&resp->fh, &argp->fh); 70 fhp = fh_copy(&resp->fh, &argp->fh);
68 nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0); 71
72 /*
73 * NFSv2 does not differentiate between "set-[ac]time-to-now"
74 * which only requires access, and "set-[ac]time-to-X" which
75 * requires ownership.
76 * So if it looks like it might be "set both to the same time which
77 * is close to now", and if inode_change_ok fails, then we
78 * convert to "set to now" instead of "set to explicit time"
79 *
80 * We only call inode_change_ok as the last test as technically
81 * it is not an interface that we should be using.
82 */
83#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
84#define MAX_TOUCH_TIME_ERROR (30*60)
85 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
86 iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
87 /*
88 * Looks probable.
89 *
90 * Now just make sure time is in the right ballpark.
91 * Solaris, at least, doesn't seem to care what the time
92 * request is. We require it be within 30 minutes of now.
93 */
94 time_t delta = iap->ia_atime.tv_sec - get_seconds();
95 struct inode *inode;
96
97 nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
98 if (nfserr)
99 goto done;
100 inode = d_inode(fhp->fh_dentry);
101
102 if (delta < 0)
103 delta = -delta;
104 if (delta < MAX_TOUCH_TIME_ERROR &&
105 inode_change_ok(inode, iap) != 0) {
106 /*
107 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
108 * This will cause notify_change to set these times
109 * to "now"
110 */
111 iap->ia_valid &= ~BOTH_TIME_SET;
112 }
113 }
114
115 nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
116done:
69 return nfsd_return_attrs(nfserr, resp); 117 return nfsd_return_attrs(nfserr, resp);
70} 118}
71 119
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index dbc4f85a5008..4874ce515fc1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,6 +68,7 @@ struct nfsd4_callback {
68 struct nfsd4_callback_ops *cb_ops; 68 struct nfsd4_callback_ops *cb_ops;
69 struct work_struct cb_work; 69 struct work_struct cb_work;
70 int cb_status; 70 int cb_status;
71 bool cb_update_seq_nr;
71 bool cb_need_restart; 72 bool cb_need_restart;
72}; 73};
73 74
@@ -582,9 +583,9 @@ enum nfsd4_cb_op {
582struct nfsd4_compound_state; 583struct nfsd4_compound_state;
583struct nfsd_net; 584struct nfsd_net;
584 585
585extern __be32 nfs4_preprocess_stateid_op(struct net *net, 586extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
586 struct nfsd4_compound_state *cstate, 587 struct nfsd4_compound_state *cstate, stateid_t *stateid,
587 stateid_t *stateid, int flags, struct file **filp); 588 int flags, struct file **filp, bool *tmp_file);
588__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 589__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
589 stateid_t *stateid, unsigned char typemask, 590 stateid_t *stateid, unsigned char typemask,
590 struct nfs4_stid **s, struct nfsd_net *nn); 591 struct nfs4_stid **s, struct nfsd_net *nn);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 84d770be056e..b5e077a6e7d4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -302,42 +302,6 @@ commit_metadata(struct svc_fh *fhp)
302static void 302static void
303nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) 303nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
304{ 304{
305 /*
306 * NFSv2 does not differentiate between "set-[ac]time-to-now"
307 * which only requires access, and "set-[ac]time-to-X" which
308 * requires ownership.
309 * So if it looks like it might be "set both to the same time which
310 * is close to now", and if inode_change_ok fails, then we
311 * convert to "set to now" instead of "set to explicit time"
312 *
313 * We only call inode_change_ok as the last test as technically
314 * it is not an interface that we should be using.
315 */
316#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
317#define MAX_TOUCH_TIME_ERROR (30*60)
318 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
319 iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
320 /*
321 * Looks probable.
322 *
323 * Now just make sure time is in the right ballpark.
324 * Solaris, at least, doesn't seem to care what the time
325 * request is. We require it be within 30 minutes of now.
326 */
327 time_t delta = iap->ia_atime.tv_sec - get_seconds();
328 if (delta < 0)
329 delta = -delta;
330 if (delta < MAX_TOUCH_TIME_ERROR &&
331 inode_change_ok(inode, iap) != 0) {
332 /*
333 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
334 * This will cause notify_change to set these times
335 * to "now"
336 */
337 iap->ia_valid &= ~BOTH_TIME_SET;
338 }
339 }
340
341 /* sanitize the mode change */ 305 /* sanitize the mode change */
342 if (iap->ia_valid & ATTR_MODE) { 306 if (iap->ia_valid & ATTR_MODE) {
343 iap->ia_mode &= S_IALLUGO; 307 iap->ia_mode &= S_IALLUGO;
@@ -538,16 +502,11 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
538 struct file *file, loff_t offset, loff_t len, 502 struct file *file, loff_t offset, loff_t len,
539 int flags) 503 int flags)
540{ 504{
541 __be32 err;
542 int error; 505 int error;
543 506
544 if (!S_ISREG(file_inode(file)->i_mode)) 507 if (!S_ISREG(file_inode(file)->i_mode))
545 return nfserr_inval; 508 return nfserr_inval;
546 509
547 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
548 if (err)
549 return err;
550
551 error = vfs_fallocate(file, flags, offset, len); 510 error = vfs_fallocate(file, flags, offset, len);
552 if (!error) 511 if (!error)
553 error = commit_metadata(fhp); 512 error = commit_metadata(fhp);
@@ -744,7 +703,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
744 703
745 host_err = ima_file_check(file, may_flags, 0); 704 host_err = ima_file_check(file, may_flags, 0);
746 if (host_err) { 705 if (host_err) {
747 nfsd_close(file); 706 fput(file);
748 goto out_nfserr; 707 goto out_nfserr;
749 } 708 }
750 709
@@ -761,23 +720,12 @@ out:
761 return err; 720 return err;
762} 721}
763 722
764/* 723struct raparms *
765 * Close a file. 724nfsd_init_raparms(struct file *file)
766 */
767void
768nfsd_close(struct file *filp)
769{
770 fput(filp);
771}
772
773/*
774 * Obtain the readahead parameters for the file
775 * specified by (dev, ino).
776 */
777
778static inline struct raparms *
779nfsd_get_raparms(dev_t dev, ino_t ino)
780{ 725{
726 struct inode *inode = file_inode(file);
727 dev_t dev = inode->i_sb->s_dev;
728 ino_t ino = inode->i_ino;
781 struct raparms *ra, **rap, **frap = NULL; 729 struct raparms *ra, **rap, **frap = NULL;
782 int depth = 0; 730 int depth = 0;
783 unsigned int hash; 731 unsigned int hash;
@@ -814,9 +762,23 @@ found:
814 ra->p_count++; 762 ra->p_count++;
815 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 763 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
816 spin_unlock(&rab->pb_lock); 764 spin_unlock(&rab->pb_lock);
765
766 if (ra->p_set)
767 file->f_ra = ra->p_ra;
817 return ra; 768 return ra;
818} 769}
819 770
771void nfsd_put_raparams(struct file *file, struct raparms *ra)
772{
773 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
774
775 spin_lock(&rab->pb_lock);
776 ra->p_ra = file->f_ra;
777 ra->p_set = 1;
778 ra->p_count--;
779 spin_unlock(&rab->pb_lock);
780}
781
820/* 782/*
821 * Grab and keep cached pages associated with a file in the svc_rqst 783 * Grab and keep cached pages associated with a file in the svc_rqst
822 * so that they can be passed to the network sendmsg/sendpage routines 784 * so that they can be passed to the network sendmsg/sendpage routines
@@ -945,7 +907,7 @@ static int wait_for_concurrent_writes(struct file *file)
945 return err; 907 return err;
946} 908}
947 909
948static __be32 910__be32
949nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 911nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
950 loff_t offset, struct kvec *vec, int vlen, 912 loff_t offset, struct kvec *vec, int vlen,
951 unsigned long *cnt, int *stablep) 913 unsigned long *cnt, int *stablep)
@@ -1009,40 +971,6 @@ out_nfserr:
1009 return err; 971 return err;
1010} 972}
1011 973
1012__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
1013 struct file **file, struct raparms **ra)
1014{
1015 struct inode *inode;
1016 __be32 err;
1017
1018 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
1019 if (err)
1020 return err;
1021
1022 inode = file_inode(*file);
1023
1024 /* Get readahead parameters */
1025 *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
1026
1027 if (*ra && (*ra)->p_set)
1028 (*file)->f_ra = (*ra)->p_ra;
1029 return nfs_ok;
1030}
1031
1032void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
1033{
1034 /* Write back readahead params */
1035 if (ra) {
1036 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
1037 spin_lock(&rab->pb_lock);
1038 ra->p_ra = file->f_ra;
1039 ra->p_set = 1;
1040 ra->p_count--;
1041 spin_unlock(&rab->pb_lock);
1042 }
1043 nfsd_close(file);
1044}
1045
1046/* 974/*
1047 * Read data from a file. count must contain the requested read count 975 * Read data from a file. count must contain the requested read count
1048 * on entry. On return, *count contains the number of bytes actually read. 976 * on entry. On return, *count contains the number of bytes actually read.
@@ -1055,13 +983,15 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1055 struct raparms *ra; 983 struct raparms *ra;
1056 __be32 err; 984 __be32 err;
1057 985
1058 err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); 986 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1059 if (err) 987 if (err)
1060 return err; 988 return err;
1061 989
990 ra = nfsd_init_raparms(file);
1062 err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); 991 err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
1063 992 if (ra)
1064 nfsd_put_tmp_read_open(file, ra); 993 nfsd_put_raparams(file, ra);
994 fput(file);
1065 995
1066 return err; 996 return err;
1067} 997}
@@ -1093,7 +1023,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1093 if (cnt) 1023 if (cnt)
1094 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, 1024 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
1095 cnt, stablep); 1025 cnt, stablep);
1096 nfsd_close(file); 1026 fput(file);
1097 } 1027 }
1098out: 1028out:
1099 return err; 1029 return err;
@@ -1138,7 +1068,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1138 err = nfserr_notsupp; 1068 err = nfserr_notsupp;
1139 } 1069 }
1140 1070
1141 nfsd_close(file); 1071 fput(file);
1142out: 1072out:
1143 return err; 1073 return err;
1144} 1074}
@@ -1977,7 +1907,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1977 if (err == nfserr_eof || err == nfserr_toosmall) 1907 if (err == nfserr_eof || err == nfserr_toosmall)
1978 err = nfs_ok; /* can still be found in ->err */ 1908 err = nfs_ok; /* can still be found in ->err */
1979out_close: 1909out_close:
1980 nfsd_close(file); 1910 fput(file);
1981out: 1911out:
1982 return err; 1912 return err;
1983} 1913}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 2050cb016998..5be875e3e638 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -71,11 +71,7 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
71#endif /* CONFIG_NFSD_V3 */ 71#endif /* CONFIG_NFSD_V3 */
72__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 72__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
73 int, struct file **); 73 int, struct file **);
74void nfsd_close(struct file *);
75struct raparms; 74struct raparms;
76__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
77 struct file **, struct raparms **);
78void nfsd_put_tmp_read_open(struct file *, struct raparms *);
79__be32 nfsd_splice_read(struct svc_rqst *, 75__be32 nfsd_splice_read(struct svc_rqst *,
80 struct file *, loff_t, unsigned long *); 76 struct file *, loff_t, unsigned long *);
81__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, 77__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
@@ -84,6 +80,10 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
84 loff_t, struct kvec *, int, unsigned long *); 80 loff_t, struct kvec *, int, unsigned long *);
85__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, 81__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
86 loff_t, struct kvec *,int, unsigned long *, int *); 82 loff_t, struct kvec *,int, unsigned long *, int *);
83__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
84 struct file *file, loff_t offset,
85 struct kvec *vec, int vlen, unsigned long *cnt,
86 int *stablep);
87__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, 87__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
88 char *, int *); 88 char *, int *);
89__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, 89__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -104,6 +104,9 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
104__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, 104__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
105 struct dentry *, int); 105 struct dentry *, int);
106 106
107struct raparms *nfsd_init_raparms(struct file *file);
108void nfsd_put_raparams(struct file *file, struct raparms *ra);
109
107static inline int fh_want_write(struct svc_fh *fh) 110static inline int fh_want_write(struct svc_fh *fh)
108{ 111{
109 int ret = mnt_want_write(fh->fh_export->ex_path.mnt); 112 int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2f8c092be2b3..9f991007a578 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -273,6 +273,7 @@ struct nfsd4_read {
273 u32 rd_length; /* request */ 273 u32 rd_length; /* request */
274 int rd_vlen; 274 int rd_vlen;
275 struct file *rd_filp; 275 struct file *rd_filp;
276 bool rd_tmp_file;
276 277
277 struct svc_rqst *rd_rqstp; /* response */ 278 struct svc_rqst *rd_rqstp; /* response */
278 struct svc_fh * rd_fhp; /* response */ 279 struct svc_fh * rd_fhp; /* response */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index df8edf8ec914..cb94ee4181d4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,6 +172,13 @@ struct svcxprt_rdma {
172#define RDMAXPRT_SQ_PENDING 2 172#define RDMAXPRT_SQ_PENDING 2
173#define RDMAXPRT_CONN_PENDING 3 173#define RDMAXPRT_CONN_PENDING 3
174 174
175#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */
176#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
177#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD
178#else
179#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
180#endif
181
175#define RPCRDMA_LISTEN_BACKLOG 10 182#define RPCRDMA_LISTEN_BACKLOG 10
176/* The default ORD value is based on two outstanding full-size writes with a 183/* The default ORD value is based on two outstanding full-size writes with a
177 * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ 184 * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
@@ -182,10 +189,9 @@ struct svcxprt_rdma {
182 189
183/* svc_rdma_marshal.c */ 190/* svc_rdma_marshal.c */
184extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); 191extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
185extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
186extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, 192extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
187 struct rpcrdma_msg *, 193 struct rpcrdma_msg *,
188 enum rpcrdma_errcode, u32 *); 194 enum rpcrdma_errcode, __be32 *);
189extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); 195extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
190extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); 196extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
191extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, 197extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
@@ -212,7 +218,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
212extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); 218extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
213extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, 219extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
214 enum rpcrdma_errcode); 220 enum rpcrdma_errcode);
215struct page *svc_rdma_get_page(void);
216extern int svc_rdma_post_recv(struct svcxprt_rdma *); 221extern int svc_rdma_post_recv(struct svcxprt_rdma *);
217extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 222extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
218extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 223extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index adc0aff83fbb..2119c7c274d7 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -86,6 +86,10 @@
86#define ACL4_SUPPORT_AUDIT_ACL 0x04 86#define ACL4_SUPPORT_AUDIT_ACL 0x04
87#define ACL4_SUPPORT_ALARM_ACL 0x08 87#define ACL4_SUPPORT_ALARM_ACL 0x08
88 88
89#define NFS4_ACL_AUTO_INHERIT 0x00000001
90#define NFS4_ACL_PROTECTED 0x00000002
91#define NFS4_ACL_DEFAULTED 0x00000004
92
89#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001 93#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001
90#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002 94#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002
91#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004 95#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004
@@ -93,6 +97,7 @@
93#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 97#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
94#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 98#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
95#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 99#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
100#define NFS4_ACE_INHERITED_ACE 0x00000080
96 101
97#define NFS4_ACE_READ_DATA 0x00000001 102#define NFS4_ACE_READ_DATA 0x00000001
98#define NFS4_ACE_LIST_DIRECTORY 0x00000001 103#define NFS4_ACE_LIST_DIRECTORY 0x00000001
@@ -106,6 +111,8 @@
106#define NFS4_ACE_DELETE_CHILD 0x00000040 111#define NFS4_ACE_DELETE_CHILD 0x00000040
107#define NFS4_ACE_READ_ATTRIBUTES 0x00000080 112#define NFS4_ACE_READ_ATTRIBUTES 0x00000080
108#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100 113#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100
114#define NFS4_ACE_WRITE_RETENTION 0x00000200
115#define NFS4_ACE_WRITE_RETENTION_HOLD 0x00000400
109#define NFS4_ACE_DELETE 0x00010000 116#define NFS4_ACE_DELETE 0x00010000
110#define NFS4_ACE_READ_ACL 0x00020000 117#define NFS4_ACE_READ_ACL 0x00020000
111#define NFS4_ACE_WRITE_ACL 0x00040000 118#define NFS4_ACE_WRITE_ACL 0x00040000
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9068e72aa73c..04ce2c0b660e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
48 48
49 If unsure, say Y. 49 If unsure, say Y.
50 50
51config SUNRPC_XPRT_RDMA_CLIENT 51config SUNRPC_XPRT_RDMA
52 tristate "RPC over RDMA Client Support" 52 tristate "RPC-over-RDMA transport"
53 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS 53 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
54 default SUNRPC && INFINIBAND 54 default SUNRPC && INFINIBAND
55 help 55 help
56 This option allows the NFS client to support an RDMA-enabled 56 This option allows the NFS client and server to use RDMA
57 transport. 57 transports (InfiniBand, iWARP, or RoCE).
58 58
59 To compile RPC client RDMA transport support as a module, 59 To compile this support as a module, choose M. The module
60 choose M here: the module will be called xprtrdma. 60 will be called rpcrdma.ko.
61 61
62 If unsure, say N. 62 If unsure, or you know there is no RDMA capability on your
63 63 hardware platform, say N.
64config SUNRPC_XPRT_RDMA_SERVER
65 tristate "RPC over RDMA Server Support"
66 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
67 default SUNRPC && INFINIBAND
68 help
69 This option allows the NFS server to support an RDMA-enabled
70 transport.
71
72 To compile RPC server RDMA transport support as a module,
73 choose M here: the module will be called svcrdma.
74
75 If unsure, say N.
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 15e6f6c23c5d..936ad0a15371 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,8 +5,7 @@
5 5
6obj-$(CONFIG_SUNRPC) += sunrpc.o 6obj-$(CONFIG_SUNRPC) += sunrpc.o
7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ 7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
8 8obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
9obj-y += xprtrdma/
10 9
11sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ 10sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
12 auth.o auth_null.o auth_unix.o auth_generic.o \ 11 auth.o auth_null.o auth_unix.o auth_generic.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b5408e8a37f2..fee3c15a4b52 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
881 if (err) 881 if (err)
882 goto out_err; 882 goto out_err;
883 883
884 sg_init_table(sg, 1); 884 sg_init_one(sg, &zeroconstant, 4);
885 sg_set_buf(sg, &zeroconstant, 4);
886
887 err = crypto_hash_digest(&desc, sg, 4, Kseq); 885 err = crypto_hash_digest(&desc, sg, 4, Kseq);
888 if (err) 886 if (err)
889 goto out_err; 887 goto out_err;
@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
951 if (err) 949 if (err)
952 goto out_err; 950 goto out_err;
953 951
954 sg_init_table(sg, 1); 952 sg_init_one(sg, zeroconstant, 4);
955 sg_set_buf(sg, zeroconstant, 4);
956
957 err = crypto_hash_digest(&desc, sg, 4, Kcrypt); 953 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
958 if (err) 954 if (err)
959 goto out_err; 955 goto out_err;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 78974e4d9ad2..852ae606b02a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1290,7 +1290,6 @@ err_bad:
1290 svc_putnl(resv, ntohl(rpc_stat)); 1290 svc_putnl(resv, ntohl(rpc_stat));
1291 goto sendit; 1291 goto sendit;
1292} 1292}
1293EXPORT_SYMBOL_GPL(svc_process);
1294 1293
1295/* 1294/*
1296 * Process the RPC request. 1295 * Process the RPC request.
@@ -1338,6 +1337,7 @@ out_drop:
1338 svc_drop(rqstp); 1337 svc_drop(rqstp);
1339 return 0; 1338 return 0;
1340} 1339}
1340EXPORT_SYMBOL_GPL(svc_process);
1341 1341
1342#if defined(CONFIG_SUNRPC_BACKCHANNEL) 1342#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1343/* 1343/*
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 579f72bbcf4b..48913de240bd 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,9 +1,7 @@
1obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o 1obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
2 2
3xprtrdma-y := transport.o rpc_rdma.o verbs.o \ 3rpcrdma-y := transport.o rpc_rdma.o verbs.o \
4 fmr_ops.o frwr_ops.o physical_ops.o 4 fmr_ops.o frwr_ops.o physical_ops.o \
5 5 svc_rdma.o svc_rdma_transport.o \
6obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o 6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
7 7 module.o
8svcrdma-y := svc_rdma.o svc_rdma_transport.o \
9 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
new file mode 100644
index 000000000000..560712bd9fa2
--- /dev/null
+++ b/net/sunrpc/xprtrdma/module.c
@@ -0,0 +1,46 @@
1/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 */
4
5/* rpcrdma.ko module initialization
6 */
7
8#include <linux/module.h>
9#include <linux/init.h>
10#include <linux/sunrpc/svc_rdma.h>
11#include "xprt_rdma.h"
12
13#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
14# define RPCDBG_FACILITY RPCDBG_TRANS
15#endif
16
17MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
18MODULE_DESCRIPTION("RPC/RDMA Transport");
19MODULE_LICENSE("Dual BSD/GPL");
20MODULE_ALIAS("svcrdma");
21MODULE_ALIAS("xprtrdma");
22
23static void __exit rpc_rdma_cleanup(void)
24{
25 xprt_rdma_cleanup();
26 svc_rdma_cleanup();
27}
28
29static int __init rpc_rdma_init(void)
30{
31 int rc;
32
33 rc = svc_rdma_init();
34 if (rc)
35 goto out;
36
37 rc = xprt_rdma_init();
38 if (rc)
39 svc_rdma_cleanup();
40
41out:
42 return rc;
43}
44
45module_init(rpc_rdma_init);
46module_exit(rpc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c1b6270262c2..2cd252f023a5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -38,8 +38,7 @@
38 * 38 *
39 * Author: Tom Tucker <tom@opengridcomputing.com> 39 * Author: Tom Tucker <tom@opengridcomputing.com>
40 */ 40 */
41#include <linux/module.h> 41
42#include <linux/init.h>
43#include <linux/slab.h> 42#include <linux/slab.h>
44#include <linux/fs.h> 43#include <linux/fs.h>
45#include <linux/sysctl.h> 44#include <linux/sysctl.h>
@@ -295,8 +294,3 @@ int svc_rdma_init(void)
295 destroy_workqueue(svc_rdma_wq); 294 destroy_workqueue(svc_rdma_wq);
296 return -ENOMEM; 295 return -ENOMEM;
297} 296}
298MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
299MODULE_DESCRIPTION("SVC RDMA Transport");
300MODULE_LICENSE("Dual BSD/GPL");
301module_init(svc_rdma_init);
302module_exit(svc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index b681855cf970..e2fca7617242 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -50,12 +50,12 @@
50/* 50/*
51 * Decodes a read chunk list. The expected format is as follows: 51 * Decodes a read chunk list. The expected format is as follows:
52 * descrim : xdr_one 52 * descrim : xdr_one
53 * position : u32 offset into XDR stream 53 * position : __be32 offset into XDR stream
54 * handle : u32 RKEY 54 * handle : __be32 RKEY
55 * . . . 55 * . . .
56 * end-of-list: xdr_zero 56 * end-of-list: xdr_zero
57 */ 57 */
58static u32 *decode_read_list(u32 *va, u32 *vaend) 58static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
59{ 59{
60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; 60 struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61 61
@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
67 } 67 }
68 ch++; 68 ch++;
69 } 69 }
70 return (u32 *)&ch->rc_position; 70 return &ch->rc_position;
71} 71}
72 72
73/* 73/*
74 * Decodes a write chunk list. The expected format is as follows: 74 * Decodes a write chunk list. The expected format is as follows:
75 * descrim : xdr_one 75 * descrim : xdr_one
76 * nchunks : <count> 76 * nchunks : <count>
77 * handle : u32 RKEY ---+ 77 * handle : __be32 RKEY ---+
78 * length : u32 <len of segment> | 78 * length : __be32 <len of segment> |
79 * offset : remove va + <count> 79 * offset : remove va + <count>
80 * . . . | 80 * . . . |
81 * ---+ 81 * ---+
82 */ 82 */
83static u32 *decode_write_list(u32 *va, u32 *vaend) 83static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
84{ 84{
85 unsigned long start, end; 85 unsigned long start, end;
86 int nchunks; 86 int nchunks;
@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
90 90
91 /* Check for not write-array */ 91 /* Check for not write-array */
92 if (ary->wc_discrim == xdr_zero) 92 if (ary->wc_discrim == xdr_zero)
93 return (u32 *)&ary->wc_nchunks; 93 return &ary->wc_nchunks;
94 94
95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 95 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
96 (unsigned long)vaend) { 96 (unsigned long)vaend) {
97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 97 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98 return NULL; 98 return NULL;
99 } 99 }
100 nchunks = ntohl(ary->wc_nchunks); 100 nchunks = be32_to_cpu(ary->wc_nchunks);
101 101
102 start = (unsigned long)&ary->wc_array[0]; 102 start = (unsigned long)&ary->wc_array[0];
103 end = (unsigned long)vaend; 103 end = (unsigned long)vaend;
@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
112 * rs_length is the 2nd 4B field in wc_target and taking its 112 * rs_length is the 2nd 4B field in wc_target and taking its
113 * address skips the list terminator 113 * address skips the list terminator
114 */ 114 */
115 return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; 115 return &ary->wc_array[nchunks].wc_target.rs_length;
116} 116}
117 117
118static u32 *decode_reply_array(u32 *va, u32 *vaend) 118static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
119{ 119{
120 unsigned long start, end; 120 unsigned long start, end;
121 int nchunks; 121 int nchunks;
@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
124 124
125 /* Check for no reply-array */ 125 /* Check for no reply-array */
126 if (ary->wc_discrim == xdr_zero) 126 if (ary->wc_discrim == xdr_zero)
127 return (u32 *)&ary->wc_nchunks; 127 return &ary->wc_nchunks;
128 128
129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > 129 if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130 (unsigned long)vaend) { 130 (unsigned long)vaend) {
131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); 131 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132 return NULL; 132 return NULL;
133 } 133 }
134 nchunks = ntohl(ary->wc_nchunks); 134 nchunks = be32_to_cpu(ary->wc_nchunks);
135 135
136 start = (unsigned long)&ary->wc_array[0]; 136 start = (unsigned long)&ary->wc_array[0];
137 end = (unsigned long)vaend; 137 end = (unsigned long)vaend;
@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
142 ary, nchunks, vaend); 142 ary, nchunks, vaend);
143 return NULL; 143 return NULL;
144 } 144 }
145 return (u32 *)&ary->wc_array[nchunks]; 145 return (__be32 *)&ary->wc_array[nchunks];
146} 146}
147 147
148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
149 struct svc_rqst *rqstp) 149 struct svc_rqst *rqstp)
150{ 150{
151 struct rpcrdma_msg *rmsgp = NULL; 151 struct rpcrdma_msg *rmsgp = NULL;
152 u32 *va; 152 __be32 *va, *vaend;
153 u32 *vaend;
154 u32 hdr_len; 153 u32 hdr_len;
155 154
156 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 155 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
162 return -EINVAL; 161 return -EINVAL;
163 } 162 }
164 163
165 /* Decode the header */ 164 if (rmsgp->rm_vers != rpcrdma_version)
166 rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
167 rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
168 rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
169 rmsgp->rm_type = ntohl(rmsgp->rm_type);
170
171 if (rmsgp->rm_vers != RPCRDMA_VERSION)
172 return -ENOSYS; 165 return -ENOSYS;
173 166
174 /* Pull in the extra for the padded case and bump our pointer */ 167 /* Pull in the extra for the padded case and bump our pointer */
175 if (rmsgp->rm_type == RDMA_MSGP) { 168 if (rmsgp->rm_type == rdma_msgp) {
176 int hdrlen; 169 int hdrlen;
170
177 rmsgp->rm_body.rm_padded.rm_align = 171 rmsgp->rm_body.rm_padded.rm_align =
178 ntohl(rmsgp->rm_body.rm_padded.rm_align); 172 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
179 rmsgp->rm_body.rm_padded.rm_thresh = 173 rmsgp->rm_body.rm_padded.rm_thresh =
180 ntohl(rmsgp->rm_body.rm_padded.rm_thresh); 174 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
181 175
182 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 176 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
183 rqstp->rq_arg.head[0].iov_base = va; 177 rqstp->rq_arg.head[0].iov_base = va;
@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
192 * chunk list and a reply chunk list. 186 * chunk list and a reply chunk list.
193 */ 187 */
194 va = &rmsgp->rm_body.rm_chunks[0]; 188 va = &rmsgp->rm_body.rm_chunks[0];
195 vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 189 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
196 va = decode_read_list(va, vaend); 190 va = decode_read_list(va, vaend);
197 if (!va) 191 if (!va)
198 return -EINVAL; 192 return -EINVAL;
@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
211 return hdr_len; 205 return hdr_len;
212} 206}
213 207
214int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
215{
216 struct rpcrdma_msg *rmsgp = NULL;
217 struct rpcrdma_read_chunk *ch;
218 struct rpcrdma_write_array *ary;
219 u32 *va;
220 u32 hdrlen;
221
222 dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
223 rqstp);
224 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
225
226 /* Pull in the extra for the padded case and bump our pointer */
227 if (rmsgp->rm_type == RDMA_MSGP) {
228 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
229 rqstp->rq_arg.head[0].iov_base = va;
230 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
231 rqstp->rq_arg.head[0].iov_len -= hdrlen;
232 return hdrlen;
233 }
234
235 /*
236 * Skip all chunks to find RPC msg. These were previously processed
237 */
238 va = &rmsgp->rm_body.rm_chunks[0];
239
240 /* Skip read-list */
241 for (ch = (struct rpcrdma_read_chunk *)va;
242 ch->rc_discrim != xdr_zero; ch++);
243 va = (u32 *)&ch->rc_position;
244
245 /* Skip write-list */
246 ary = (struct rpcrdma_write_array *)va;
247 if (ary->wc_discrim == xdr_zero)
248 va = (u32 *)&ary->wc_nchunks;
249 else
250 /*
251 * rs_length is the 2nd 4B field in wc_target and taking its
252 * address skips the list terminator
253 */
254 va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
255
256 /* Skip reply-array */
257 ary = (struct rpcrdma_write_array *)va;
258 if (ary->wc_discrim == xdr_zero)
259 va = (u32 *)&ary->wc_nchunks;
260 else
261 va = (u32 *)&ary->wc_array[ary->wc_nchunks];
262
263 rqstp->rq_arg.head[0].iov_base = va;
264 hdrlen = (unsigned long)va - (unsigned long)rmsgp;
265 rqstp->rq_arg.head[0].iov_len -= hdrlen;
266
267 return hdrlen;
268}
269
270int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, 208int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
271 struct rpcrdma_msg *rmsgp, 209 struct rpcrdma_msg *rmsgp,
272 enum rpcrdma_errcode err, u32 *va) 210 enum rpcrdma_errcode err, __be32 *va)
273{ 211{
274 u32 *startp = va; 212 __be32 *startp = va;
275 213
276 *va++ = htonl(rmsgp->rm_xid); 214 *va++ = rmsgp->rm_xid;
277 *va++ = htonl(rmsgp->rm_vers); 215 *va++ = rmsgp->rm_vers;
278 *va++ = htonl(xprt->sc_max_requests); 216 *va++ = cpu_to_be32(xprt->sc_max_requests);
279 *va++ = htonl(RDMA_ERROR); 217 *va++ = rdma_error;
280 *va++ = htonl(err); 218 *va++ = cpu_to_be32(err);
281 if (err == ERR_VERS) { 219 if (err == ERR_VERS) {
282 *va++ = htonl(RPCRDMA_VERSION); 220 *va++ = rpcrdma_version;
283 *va++ = htonl(RPCRDMA_VERSION); 221 *va++ = rpcrdma_version;
284 } 222 }
285 223
286 return (int)((unsigned long)va - (unsigned long)startp); 224 return (int)((unsigned long)va - (unsigned long)startp);
@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
297 &rmsgp->rm_body.rm_chunks[1]; 235 &rmsgp->rm_body.rm_chunks[1];
298 if (wr_ary->wc_discrim) 236 if (wr_ary->wc_discrim)
299 wr_ary = (struct rpcrdma_write_array *) 237 wr_ary = (struct rpcrdma_write_array *)
300 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. 238 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
301 wc_target.rs_length; 239 wc_target.rs_length;
302 else 240 else
303 wr_ary = (struct rpcrdma_write_array *) 241 wr_ary = (struct rpcrdma_write_array *)
@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
306 /* skip reply array */ 244 /* skip reply array */
307 if (wr_ary->wc_discrim) 245 if (wr_ary->wc_discrim)
308 wr_ary = (struct rpcrdma_write_array *) 246 wr_ary = (struct rpcrdma_write_array *)
309 &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; 247 &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
310 else 248 else
311 wr_ary = (struct rpcrdma_write_array *) 249 wr_ary = (struct rpcrdma_write_array *)
312 &wr_ary->wc_nchunks; 250 &wr_ary->wc_nchunks;
@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
325 ary = (struct rpcrdma_write_array *) 263 ary = (struct rpcrdma_write_array *)
326 &rmsgp->rm_body.rm_chunks[1]; 264 &rmsgp->rm_body.rm_chunks[1];
327 ary->wc_discrim = xdr_one; 265 ary->wc_discrim = xdr_one;
328 ary->wc_nchunks = htonl(chunks); 266 ary->wc_nchunks = cpu_to_be32(chunks);
329 267
330 /* write-list terminator */ 268 /* write-list terminator */
331 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; 269 ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
338 int chunks) 276 int chunks)
339{ 277{
340 ary->wc_discrim = xdr_one; 278 ary->wc_discrim = xdr_one;
341 ary->wc_nchunks = htonl(chunks); 279 ary->wc_nchunks = cpu_to_be32(chunks);
342} 280}
343 281
344void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, 282void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
350 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; 288 struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
351 seg->rs_handle = rs_handle; 289 seg->rs_handle = rs_handle;
352 seg->rs_offset = rs_offset; 290 seg->rs_offset = rs_offset;
353 seg->rs_length = htonl(write_len); 291 seg->rs_length = cpu_to_be32(write_len);
354} 292}
355 293
356void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, 294void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
358 struct rpcrdma_msg *rdma_resp, 296 struct rpcrdma_msg *rdma_resp,
359 enum rpcrdma_proc rdma_type) 297 enum rpcrdma_proc rdma_type)
360{ 298{
361 rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); 299 rdma_resp->rm_xid = rdma_argp->rm_xid;
362 rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); 300 rdma_resp->rm_vers = rdma_argp->rm_vers;
363 rdma_resp->rm_credit = htonl(xprt->sc_max_requests); 301 rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
364 rdma_resp->rm_type = htonl(rdma_type); 302 rdma_resp->rm_type = cpu_to_be32(rdma_type);
365 303
366 /* Encode <nul> chunks lists */ 304 /* Encode <nul> chunks lists */
367 rdma_resp->rm_body.rm_chunks[0] = xdr_zero; 305 rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 86b44164172b..2e1348bde325 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
85 85
86 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ 86 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
87 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; 87 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
88 if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) 88 if (rmsgp->rm_type == rdma_nomsg)
89 rqstp->rq_arg.pages = &rqstp->rq_pages[0]; 89 rqstp->rq_arg.pages = &rqstp->rq_pages[0];
90 else 90 else
91 rqstp->rq_arg.pages = &rqstp->rq_pages[1]; 91 rqstp->rq_arg.pages = &rqstp->rq_pages[1];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7de33d1af9b6..d25cd430f9ff 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
240 u32 xdr_off; 240 u32 xdr_off;
241 int chunk_off; 241 int chunk_off;
242 int chunk_no; 242 int chunk_no;
243 int nchunks;
243 struct rpcrdma_write_array *arg_ary; 244 struct rpcrdma_write_array *arg_ary;
244 struct rpcrdma_write_array *res_ary; 245 struct rpcrdma_write_array *res_ary;
245 int ret; 246 int ret;
@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
251 &rdma_resp->rm_body.rm_chunks[1]; 252 &rdma_resp->rm_body.rm_chunks[1];
252 253
253 /* Write chunks start at the pagelist */ 254 /* Write chunks start at the pagelist */
255 nchunks = be32_to_cpu(arg_ary->wc_nchunks);
254 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 256 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
255 xfer_len && chunk_no < arg_ary->wc_nchunks; 257 xfer_len && chunk_no < nchunks;
256 chunk_no++) { 258 chunk_no++) {
257 struct rpcrdma_segment *arg_ch; 259 struct rpcrdma_segment *arg_ch;
258 u64 rs_offset; 260 u64 rs_offset;
259 261
260 arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 262 arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
261 write_len = min(xfer_len, ntohl(arg_ch->rs_length)); 263 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
262 264
263 /* Prepare the response chunk given the length actually 265 /* Prepare the response chunk given the length actually
264 * written */ 266 * written */
@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
270 chunk_off = 0; 272 chunk_off = 0;
271 while (write_len) { 273 while (write_len) {
272 ret = send_write(xprt, rqstp, 274 ret = send_write(xprt, rqstp,
273 ntohl(arg_ch->rs_handle), 275 be32_to_cpu(arg_ch->rs_handle),
274 rs_offset + chunk_off, 276 rs_offset + chunk_off,
275 xdr_off, 277 xdr_off,
276 write_len, 278 write_len,
@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
318 &rdma_resp->rm_body.rm_chunks[2]; 320 &rdma_resp->rm_body.rm_chunks[2];
319 321
320 /* xdr offset starts at RPC message */ 322 /* xdr offset starts at RPC message */
321 nchunks = ntohl(arg_ary->wc_nchunks); 323 nchunks = be32_to_cpu(arg_ary->wc_nchunks);
322 for (xdr_off = 0, chunk_no = 0; 324 for (xdr_off = 0, chunk_no = 0;
323 xfer_len && chunk_no < nchunks; 325 xfer_len && chunk_no < nchunks;
324 chunk_no++) { 326 chunk_no++) {
325 u64 rs_offset; 327 u64 rs_offset;
326 ch = &arg_ary->wc_array[chunk_no].wc_target; 328 ch = &arg_ary->wc_array[chunk_no].wc_target;
327 write_len = min(xfer_len, htonl(ch->rs_length)); 329 write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
328 330
329 /* Prepare the reply chunk given the length actually 331 /* Prepare the reply chunk given the length actually
330 * written */ 332 * written */
@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
335 chunk_off = 0; 337 chunk_off = 0;
336 while (write_len) { 338 while (write_len) {
337 ret = send_write(xprt, rqstp, 339 ret = send_write(xprt, rqstp,
338 ntohl(ch->rs_handle), 340 be32_to_cpu(ch->rs_handle),
339 rs_offset + chunk_off, 341 rs_offset + chunk_off,
340 xdr_off, 342 xdr_off,
341 write_len, 343 write_len,
@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
515 inline_bytes = rqstp->rq_res.len; 517 inline_bytes = rqstp->rq_res.len;
516 518
517 /* Create the RDMA response header */ 519 /* Create the RDMA response header */
518 res_page = svc_rdma_get_page(); 520 res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
519 rdma_resp = page_address(res_page); 521 rdma_resp = page_address(res_page);
520 reply_ary = svc_rdma_get_reply_array(rdma_argp); 522 reply_ary = svc_rdma_get_reply_array(rdma_argp);
521 if (reply_ary) 523 if (reply_ary)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f4cfa764d76f..6b36279e4288 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
91 .xcl_name = "rdma", 91 .xcl_name = "rdma",
92 .xcl_owner = THIS_MODULE, 92 .xcl_owner = THIS_MODULE,
93 .xcl_ops = &svc_rdma_ops, 93 .xcl_ops = &svc_rdma_ops,
94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, 94 .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 95 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 96};
97 97
@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
99{ 99{
100 struct svc_rdma_op_ctxt *ctxt; 100 struct svc_rdma_op_ctxt *ctxt;
101 101
102 while (1) { 102 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
103 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL); 103 GFP_KERNEL | __GFP_NOFAIL);
104 if (ctxt)
105 break;
106 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
107 }
108 ctxt->xprt = xprt; 104 ctxt->xprt = xprt;
109 INIT_LIST_HEAD(&ctxt->dto_q); 105 INIT_LIST_HEAD(&ctxt->dto_q);
110 ctxt->count = 0; 106 ctxt->count = 0;
@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
156struct svc_rdma_req_map *svc_rdma_get_req_map(void) 152struct svc_rdma_req_map *svc_rdma_get_req_map(void)
157{ 153{
158 struct svc_rdma_req_map *map; 154 struct svc_rdma_req_map *map;
159 while (1) { 155 map = kmem_cache_alloc(svc_rdma_map_cachep,
160 map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL); 156 GFP_KERNEL | __GFP_NOFAIL);
161 if (map)
162 break;
163 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
164 }
165 map->count = 0; 157 map->count = 0;
166 return map; 158 return map;
167} 159}
@@ -493,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
493 return cma_xprt; 485 return cma_xprt;
494} 486}
495 487
496struct page *svc_rdma_get_page(void)
497{
498 struct page *page;
499
500 while ((page = alloc_page(GFP_KERNEL)) == NULL) {
501 /* If we can't get memory, wait a bit and try again */
502 printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
503 schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
504 }
505 return page;
506}
507
508int svc_rdma_post_recv(struct svcxprt_rdma *xprt) 488int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
509{ 489{
510 struct ib_recv_wr recv_wr, *bad_recv_wr; 490 struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -523,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
523 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 503 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
524 goto err_put_ctxt; 504 goto err_put_ctxt;
525 } 505 }
526 page = svc_rdma_get_page(); 506 page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
527 ctxt->pages[sge_no] = page; 507 ctxt->pages[sge_no] = page;
528 pa = ib_dma_map_page(xprt->sc_cm_id->device, 508 pa = ib_dma_map_page(xprt->sc_cm_id->device,
529 page, 0, PAGE_SIZE, 509 page, 0, PAGE_SIZE,
@@ -1318,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1318 struct ib_send_wr err_wr; 1298 struct ib_send_wr err_wr;
1319 struct page *p; 1299 struct page *p;
1320 struct svc_rdma_op_ctxt *ctxt; 1300 struct svc_rdma_op_ctxt *ctxt;
1321 u32 *va; 1301 __be32 *va;
1322 int length; 1302 int length;
1323 int ret; 1303 int ret;
1324 1304
1325 p = svc_rdma_get_page(); 1305 p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
1326 va = page_address(p); 1306 va = page_address(p);
1327 1307
1328 /* XDR encode error */ 1308 /* XDR encode error */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 54f23b1be986..436da2caec95 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -48,7 +48,6 @@
48 */ 48 */
49 49
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/init.h>
52#include <linux/slab.h> 51#include <linux/slab.h>
53#include <linux/seq_file.h> 52#include <linux/seq_file.h>
54#include <linux/sunrpc/addr.h> 53#include <linux/sunrpc/addr.h>
@@ -59,11 +58,6 @@
59# define RPCDBG_FACILITY RPCDBG_TRANS 58# define RPCDBG_FACILITY RPCDBG_TRANS
60#endif 59#endif
61 60
62MODULE_LICENSE("Dual BSD/GPL");
63
64MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
65MODULE_AUTHOR("Network Appliance, Inc.");
66
67/* 61/*
68 * tunables 62 * tunables
69 */ 63 */
@@ -711,7 +705,7 @@ static struct xprt_class xprt_rdma = {
711 .setup = xprt_setup_rdma, 705 .setup = xprt_setup_rdma,
712}; 706};
713 707
714static void __exit xprt_rdma_cleanup(void) 708void xprt_rdma_cleanup(void)
715{ 709{
716 int rc; 710 int rc;
717 711
@@ -728,7 +722,7 @@ static void __exit xprt_rdma_cleanup(void)
728 __func__, rc); 722 __func__, rc);
729} 723}
730 724
731static int __init xprt_rdma_init(void) 725int xprt_rdma_init(void)
732{ 726{
733 int rc; 727 int rc;
734 728
@@ -753,6 +747,3 @@ static int __init xprt_rdma_init(void)
753#endif 747#endif
754 return 0; 748 return 0;
755} 749}
756
757module_init(xprt_rdma_init);
758module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 78e0b8beaa36..58163b88738c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -480,6 +480,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
480 */ 480 */
481int rpcrdma_marshal_req(struct rpc_rqst *); 481int rpcrdma_marshal_req(struct rpc_rqst *);
482 482
483/* RPC/RDMA module init - xprtrdma/transport.c
484 */
485int xprt_rdma_init(void);
486void xprt_rdma_cleanup(void);
487
483/* Temporary NFS request map cache. Created in svc_rdma.c */ 488/* Temporary NFS request map cache. Created in svc_rdma.c */
484extern struct kmem_cache *svc_rdma_map_cachep; 489extern struct kmem_cache *svc_rdma_map_cachep;
485/* WR context cache. Created in svc_rdma.c */ 490/* WR context cache. Created in svc_rdma.c */
@@ -487,10 +492,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
487/* Workqueue created in svc_rdma.c */ 492/* Workqueue created in svc_rdma.c */
488extern struct workqueue_struct *svc_rdma_wq; 493extern struct workqueue_struct *svc_rdma_wq;
489 494
490#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
491#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
492#else
493#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
494#endif
495
496#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 495#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */