aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-29 14:21:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-29 14:21:12 -0400
commita74d70b63f1a0230831bcca3145d85ae016f9d4c (patch)
tree24392a2843b19e81a1a38d88b34e772bd688502b
parentb11b06d90a41766c2d31f0acb8a87aa0f2a7188f (diff)
parentc47d832bc0155153920e507f075647519bad09a2 (diff)
Merge branch 'for-2.6.40' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.40' of git://linux-nfs.org/~bfields/linux: (22 commits) nfsd: make local functions static NFSD: Remove unused variable from nfsd4_decode_bind_conn_to_session() NFSD: Check status from nfsd4_map_bcts_dir() NFSD: Remove setting unused variable in nfsd_vfs_read() nfsd41: error out on repeated RECLAIM_COMPLETE nfsd41: compare request's opcnt with session's maxops at nfsd4_sequence nfsd v4.1 lOCKT clientid field must be ignored nfsd41: add flag checking for create_session nfsd41: make sure nfs server process OPEN with EXCLUSIVE4_1 correctly nfsd4: fix wrongsec handling for PUTFH + op cases nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller nfsd4: introduce OPDESC helper nfsd4: allow fh_verify caller to skip pseudoflavor checks nfsd: distinguish functions of NFSD_MAY_* flags svcrpc: complete svsk processing on cb receive failure svcrpc: take advantage of tcp autotuning SUNRPC: Don't wait for full record to receive tcp data svcrpc: copy cb reply instead of pages svcrpc: close connection if client sends short packet svcrpc: note network-order types in svc_process_calldir ...
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4proc.c73
-rw-r--r--fs/nfsd/nfs4state.c42
-rw-r--r--fs/nfsd/nfs4xdr.c11
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/vfs.c33
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--include/linux/nfs4.h8
-rw-r--r--include/linux/sunrpc/svcsock.h1
-rw-r--r--net/sunrpc/svcsock.c336
12 files changed, 323 insertions, 199 deletions
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ad000aeb21a2..b9566e46219f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1354 if (IS_ERR(exp)) 1354 if (IS_ERR(exp))
1355 return nfserrno(PTR_ERR(exp)); 1355 return nfserrno(PTR_ERR(exp));
1356 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1356 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
1357 if (rv)
1358 goto out;
1359 rv = check_nfsd_access(exp, rqstp);
1360 if (rv)
1361 fh_put(fhp);
1362out:
1363 exp_put(exp); 1357 exp_put(exp);
1364 return rv; 1358 return rv;
1365} 1359}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 2247fc91d5e9..9095f3c21df9 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
245 } 245 }
246 246
247 /* Now create the file and set attributes */ 247 /* Now create the file and set attributes */
248 nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, 248 nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
249 attr, newfhp, 249 attr, newfhp,
250 argp->createmode, argp->verf, NULL, NULL); 250 argp->createmode, argp->verf, NULL, NULL);
251 251
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index ad48faca20fc..08c6e36ab2eb 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -842,7 +842,7 @@ out:
842 return rv; 842 return rv;
843} 843}
844 844
845__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 845static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
846{ 846{
847 struct svc_fh fh; 847 struct svc_fh fh;
848 int err; 848 int err;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5fcb1396a7e3..3a6dbd70b34b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
196 196
197 /* 197 /*
198 * Note: create modes (UNCHECKED,GUARDED...) are the same 198 * Note: create modes (UNCHECKED,GUARDED...) are the same
199 * in NFSv4 as in v3. 199 * in NFSv4 as in v3 except EXCLUSIVE4_1.
200 */ 200 */
201 status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, 201 status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
202 open->op_fname.len, &open->op_iattr, 202 open->op_fname.len, &open->op_iattr,
203 &resfh, open->op_createmode, 203 &resfh, open->op_createmode,
204 (u32 *)open->op_verf.data, 204 (u32 *)open->op_verf.data,
@@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
403 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; 403 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
404 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, 404 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
405 putfh->pf_fhlen); 405 putfh->pf_fhlen);
406 return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); 406 return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
407} 407}
408 408
409static __be32 409static __be32
@@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
762 __be32 err; 762 __be32 err;
763 763
764 fh_init(&resfh, NFS4_FHSIZE); 764 fh_init(&resfh, NFS4_FHSIZE);
765 err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
766 if (err)
767 return err;
765 err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, 768 err = nfsd_lookup_dentry(rqstp, &cstate->current_fh,
766 secinfo->si_name, secinfo->si_namelen, 769 secinfo->si_name, secinfo->si_namelen,
767 &exp, &dentry); 770 &exp, &dentry);
@@ -986,6 +989,9 @@ enum nfsd4_op_flags {
986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 989 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
987 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ 990 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
988 ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ 991 ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
992 /* For rfc 5661 section 2.6.3.1.1: */
993 OP_HANDLES_WRONGSEC = 1 << 3,
994 OP_IS_PUTFH_LIKE = 1 << 4,
989}; 995};
990 996
991struct nfsd4_operation { 997struct nfsd4_operation {
@@ -1031,6 +1037,44 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
1031 return nfs_ok; 1037 return nfs_ok;
1032} 1038}
1033 1039
1040static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
1041{
1042 return &nfsd4_ops[op->opnum];
1043}
1044
1045static bool need_wrongsec_check(struct svc_rqst *rqstp)
1046{
1047 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1048 struct nfsd4_compoundargs *argp = rqstp->rq_argp;
1049 struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
1050 struct nfsd4_op *next = &argp->ops[resp->opcnt];
1051 struct nfsd4_operation *thisd;
1052 struct nfsd4_operation *nextd;
1053
1054 thisd = OPDESC(this);
1055 /*
1056 * Most ops check wronsec on our own; only the putfh-like ops
1057 * have special rules.
1058 */
1059 if (!(thisd->op_flags & OP_IS_PUTFH_LIKE))
1060 return false;
1061 /*
1062 * rfc 5661 2.6.3.1.1.6: don't bother erroring out a
1063 * put-filehandle operation if we're not going to use the
1064 * result:
1065 */
1066 if (argp->opcnt == resp->opcnt)
1067 return false;
1068
1069 nextd = OPDESC(next);
1070 /*
1071 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC
1072 * errors themselves as necessary; others should check for them
1073 * now:
1074 */
1075 return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
1076}
1077
1034/* 1078/*
1035 * COMPOUND call. 1079 * COMPOUND call.
1036 */ 1080 */
@@ -1108,7 +1152,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1108 goto encode_op; 1152 goto encode_op;
1109 } 1153 }
1110 1154
1111 opdesc = &nfsd4_ops[op->opnum]; 1155 opdesc = OPDESC(op);
1112 1156
1113 if (!cstate->current_fh.fh_dentry) { 1157 if (!cstate->current_fh.fh_dentry) {
1114 if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { 1158 if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
@@ -1126,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1126 else 1170 else
1127 BUG_ON(op->status == nfs_ok); 1171 BUG_ON(op->status == nfs_ok);
1128 1172
1173 if (!op->status && need_wrongsec_check(rqstp))
1174 op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
1175
1129encode_op: 1176encode_op:
1130 /* Only from SEQUENCE */ 1177 /* Only from SEQUENCE */
1131 if (resp->cstate.status == nfserr_replay_cache) { 1178 if (resp->cstate.status == nfserr_replay_cache) {
@@ -1217,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
1217 }, 1264 },
1218 [OP_LOOKUP] = { 1265 [OP_LOOKUP] = {
1219 .op_func = (nfsd4op_func)nfsd4_lookup, 1266 .op_func = (nfsd4op_func)nfsd4_lookup,
1267 .op_flags = OP_HANDLES_WRONGSEC,
1220 .op_name = "OP_LOOKUP", 1268 .op_name = "OP_LOOKUP",
1221 }, 1269 },
1222 [OP_LOOKUPP] = { 1270 [OP_LOOKUPP] = {
1223 .op_func = (nfsd4op_func)nfsd4_lookupp, 1271 .op_func = (nfsd4op_func)nfsd4_lookupp,
1272 .op_flags = OP_HANDLES_WRONGSEC,
1224 .op_name = "OP_LOOKUPP", 1273 .op_name = "OP_LOOKUPP",
1225 }, 1274 },
1226 [OP_NVERIFY] = { 1275 [OP_NVERIFY] = {
@@ -1229,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1229 }, 1278 },
1230 [OP_OPEN] = { 1279 [OP_OPEN] = {
1231 .op_func = (nfsd4op_func)nfsd4_open, 1280 .op_func = (nfsd4op_func)nfsd4_open,
1281 .op_flags = OP_HANDLES_WRONGSEC,
1232 .op_name = "OP_OPEN", 1282 .op_name = "OP_OPEN",
1233 }, 1283 },
1234 [OP_OPEN_CONFIRM] = { 1284 [OP_OPEN_CONFIRM] = {
@@ -1241,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = {
1241 }, 1291 },
1242 [OP_PUTFH] = { 1292 [OP_PUTFH] = {
1243 .op_func = (nfsd4op_func)nfsd4_putfh, 1293 .op_func = (nfsd4op_func)nfsd4_putfh,
1244 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1294 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1295 | OP_IS_PUTFH_LIKE,
1245 .op_name = "OP_PUTFH", 1296 .op_name = "OP_PUTFH",
1246 }, 1297 },
1247 [OP_PUTPUBFH] = { 1298 [OP_PUTPUBFH] = {
1248 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1299 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1249 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1300 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1301 | OP_IS_PUTFH_LIKE,
1250 .op_name = "OP_PUTPUBFH", 1302 .op_name = "OP_PUTPUBFH",
1251 }, 1303 },
1252 [OP_PUTROOTFH] = { 1304 [OP_PUTROOTFH] = {
1253 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1305 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1254 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1306 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1307 | OP_IS_PUTFH_LIKE,
1255 .op_name = "OP_PUTROOTFH", 1308 .op_name = "OP_PUTROOTFH",
1256 }, 1309 },
1257 [OP_READ] = { 1310 [OP_READ] = {
@@ -1281,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
1281 }, 1334 },
1282 [OP_RESTOREFH] = { 1335 [OP_RESTOREFH] = {
1283 .op_func = (nfsd4op_func)nfsd4_restorefh, 1336 .op_func = (nfsd4op_func)nfsd4_restorefh,
1284 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1337 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1338 | OP_IS_PUTFH_LIKE,
1285 .op_name = "OP_RESTOREFH", 1339 .op_name = "OP_RESTOREFH",
1286 }, 1340 },
1287 [OP_SAVEFH] = { 1341 [OP_SAVEFH] = {
1288 .op_func = (nfsd4op_func)nfsd4_savefh, 1342 .op_func = (nfsd4op_func)nfsd4_savefh,
1343 .op_flags = OP_HANDLES_WRONGSEC,
1289 .op_name = "OP_SAVEFH", 1344 .op_name = "OP_SAVEFH",
1290 }, 1345 },
1291 [OP_SECINFO] = { 1346 [OP_SECINFO] = {
1292 .op_func = (nfsd4op_func)nfsd4_secinfo, 1347 .op_func = (nfsd4op_func)nfsd4_secinfo,
1348 .op_flags = OP_HANDLES_WRONGSEC,
1293 .op_name = "OP_SECINFO", 1349 .op_name = "OP_SECINFO",
1294 }, 1350 },
1295 [OP_SETATTR] = { 1351 [OP_SETATTR] = {
@@ -1353,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1353 }, 1409 },
1354 [OP_SECINFO_NO_NAME] = { 1410 [OP_SECINFO_NO_NAME] = {
1355 .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, 1411 .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
1412 .op_flags = OP_HANDLES_WRONGSEC,
1356 .op_name = "OP_SECINFO_NO_NAME", 1413 .op_name = "OP_SECINFO_NO_NAME",
1357 }, 1414 },
1358}; 1415};
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4cf04e11c66c..e98f3c2e9492 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1519,6 +1519,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1519 bool confirm_me = false; 1519 bool confirm_me = false;
1520 int status = 0; 1520 int status = 0;
1521 1521
1522 if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
1523 return nfserr_inval;
1524
1522 nfs4_lock_state(); 1525 nfs4_lock_state();
1523 unconf = find_unconfirmed_client(&cr_ses->clientid); 1526 unconf = find_unconfirmed_client(&cr_ses->clientid);
1524 conf = find_confirmed_client(&cr_ses->clientid); 1527 conf = find_confirmed_client(&cr_ses->clientid);
@@ -1637,8 +1640,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
1637 return nfserr_badsession; 1640 return nfserr_badsession;
1638 1641
1639 status = nfsd4_map_bcts_dir(&bcts->dir); 1642 status = nfsd4_map_bcts_dir(&bcts->dir);
1640 nfsd4_new_conn(rqstp, cstate->session, bcts->dir); 1643 if (!status)
1641 return nfs_ok; 1644 nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
1645 return status;
1642} 1646}
1643 1647
1644static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) 1648static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
@@ -1725,6 +1729,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
1725 return; 1729 return;
1726} 1730}
1727 1731
1732static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
1733{
1734 struct nfsd4_compoundargs *args = rqstp->rq_argp;
1735
1736 return args->opcnt > session->se_fchannel.maxops;
1737}
1738
1728__be32 1739__be32
1729nfsd4_sequence(struct svc_rqst *rqstp, 1740nfsd4_sequence(struct svc_rqst *rqstp,
1730 struct nfsd4_compound_state *cstate, 1741 struct nfsd4_compound_state *cstate,
@@ -1753,6 +1764,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1753 if (!session) 1764 if (!session)
1754 goto out; 1765 goto out;
1755 1766
1767 status = nfserr_too_many_ops;
1768 if (nfsd4_session_too_many_ops(rqstp, session))
1769 goto out;
1770
1756 status = nfserr_badslot; 1771 status = nfserr_badslot;
1757 if (seq->slotid >= session->se_fchannel.maxreqs) 1772 if (seq->slotid >= session->se_fchannel.maxreqs)
1758 goto out; 1773 goto out;
@@ -1808,6 +1823,8 @@ out:
1808__be32 1823__be32
1809nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) 1824nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
1810{ 1825{
1826 int status = 0;
1827
1811 if (rc->rca_one_fs) { 1828 if (rc->rca_one_fs) {
1812 if (!cstate->current_fh.fh_dentry) 1829 if (!cstate->current_fh.fh_dentry)
1813 return nfserr_nofilehandle; 1830 return nfserr_nofilehandle;
@@ -1817,9 +1834,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
1817 */ 1834 */
1818 return nfs_ok; 1835 return nfs_ok;
1819 } 1836 }
1837
1820 nfs4_lock_state(); 1838 nfs4_lock_state();
1821 if (is_client_expired(cstate->session->se_client)) { 1839 status = nfserr_complete_already;
1822 nfs4_unlock_state(); 1840 if (cstate->session->se_client->cl_firststate)
1841 goto out;
1842
1843 status = nfserr_stale_clientid;
1844 if (is_client_expired(cstate->session->se_client))
1823 /* 1845 /*
1824 * The following error isn't really legal. 1846 * The following error isn't really legal.
1825 * But we only get here if the client just explicitly 1847 * But we only get here if the client just explicitly
@@ -1827,11 +1849,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
1827 * error it gets back on an operation for the dead 1849 * error it gets back on an operation for the dead
1828 * client. 1850 * client.
1829 */ 1851 */
1830 return nfserr_stale_clientid; 1852 goto out;
1831 } 1853
1854 status = nfs_ok;
1832 nfsd4_create_clid_dir(cstate->session->se_client); 1855 nfsd4_create_clid_dir(cstate->session->se_client);
1856out:
1833 nfs4_unlock_state(); 1857 nfs4_unlock_state();
1834 return nfs_ok; 1858 return status;
1835} 1859}
1836 1860
1837__be32 1861__be32
@@ -2462,7 +2486,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2462 return NULL; 2486 return NULL;
2463} 2487}
2464 2488
2465int share_access_to_flags(u32 share_access) 2489static int share_access_to_flags(u32 share_access)
2466{ 2490{
2467 share_access &= ~NFS4_SHARE_WANT_MASK; 2491 share_access &= ~NFS4_SHARE_WANT_MASK;
2468 2492
@@ -2882,7 +2906,7 @@ out:
2882 return status; 2906 return status;
2883} 2907}
2884 2908
2885struct lock_manager nfsd4_manager = { 2909static struct lock_manager nfsd4_manager = {
2886}; 2910};
2887 2911
2888static void 2912static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c6766af00d98..990181103214 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
424static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) 424static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
425{ 425{
426 DECODE_HEAD; 426 DECODE_HEAD;
427 u32 dummy;
428 427
429 READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); 428 READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
430 COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); 429 COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
431 READ32(bcts->dir); 430 READ32(bcts->dir);
432 /* XXX: Perhaps Tom Tucker could help us figure out how we 431 /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
433 * should be using ctsa_use_conn_in_rdma_mode: */ 432 * could help us figure out we should be using it. */
434 READ32(dummy);
435
436 DECODE_TAIL; 433 DECODE_TAIL;
437} 434}
438 435
@@ -588,8 +585,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
588 READ_BUF(lockt->lt_owner.len); 585 READ_BUF(lockt->lt_owner.len);
589 READMEM(lockt->lt_owner.data, lockt->lt_owner.len); 586 READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
590 587
591 if (argp->minorversion && !zero_clientid(&lockt->lt_clientid))
592 return nfserr_inval;
593 DECODE_TAIL; 588 DECODE_TAIL;
594} 589}
595 590
@@ -3120,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr,
3120 return nfserr; 3115 return nfserr;
3121} 3116}
3122 3117
3123__be32 3118static __be32
3124nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, 3119nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3125 struct nfsd4_sequence *seq) 3120 struct nfsd4_sequence *seq)
3126{ 3121{
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 55c8e63af0be..90c6aa6d5e0f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
344 * which clients virtually always use auth_sys for, 344 * which clients virtually always use auth_sys for,
345 * even while using RPCSEC_GSS for NFS. 345 * even while using RPCSEC_GSS for NFS.
346 */ 346 */
347 if (access & NFSD_MAY_LOCK) 347 if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
348 goto skip_pseudoflavor_check; 348 goto skip_pseudoflavor_check;
349 /* 349 /*
350 * Clients may expect to be able to use auth_sys during mount, 350 * Clients may expect to be able to use auth_sys during mount,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 129f3c9f62d5..d5718273bb32 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
181 struct svc_export *exp; 181 struct svc_export *exp;
182 struct dentry *dparent; 182 struct dentry *dparent;
183 struct dentry *dentry; 183 struct dentry *dentry;
184 __be32 err;
185 int host_err; 184 int host_err;
186 185
187 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 186 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
188 187
189 /* Obtain dentry and export. */
190 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
191 if (err)
192 return err;
193
194 dparent = fhp->fh_dentry; 188 dparent = fhp->fh_dentry;
195 exp = fhp->fh_export; 189 exp = fhp->fh_export;
196 exp_get(exp); 190 exp_get(exp);
@@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
254 struct dentry *dentry; 248 struct dentry *dentry;
255 __be32 err; 249 __be32 err;
256 250
251 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
252 if (err)
253 return err;
257 err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); 254 err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
258 if (err) 255 if (err)
259 return err; 256 return err;
@@ -877,13 +874,11 @@ static __be32
877nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 874nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
878 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 875 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
879{ 876{
880 struct inode *inode;
881 mm_segment_t oldfs; 877 mm_segment_t oldfs;
882 __be32 err; 878 __be32 err;
883 int host_err; 879 int host_err;
884 880
885 err = nfserr_perm; 881 err = nfserr_perm;
886 inode = file->f_path.dentry->d_inode;
887 882
888 if (file->f_op->splice_read && rqstp->rq_splice_ok) { 883 if (file->f_op->splice_read && rqstp->rq_splice_ok) {
889 struct splice_desc sd = { 884 struct splice_desc sd = {
@@ -1340,11 +1335,18 @@ out_nfserr:
1340} 1335}
1341 1336
1342#ifdef CONFIG_NFSD_V3 1337#ifdef CONFIG_NFSD_V3
1338
1339static inline int nfsd_create_is_exclusive(int createmode)
1340{
1341 return createmode == NFS3_CREATE_EXCLUSIVE
1342 || createmode == NFS4_CREATE_EXCLUSIVE4_1;
1343}
1344
1343/* 1345/*
1344 * NFSv3 version of nfsd_create 1346 * NFSv3 and NFSv4 version of nfsd_create
1345 */ 1347 */
1346__be32 1348__be32
1347nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, 1349do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1348 char *fname, int flen, struct iattr *iap, 1350 char *fname, int flen, struct iattr *iap,
1349 struct svc_fh *resfhp, int createmode, u32 *verifier, 1351 struct svc_fh *resfhp, int createmode, u32 *verifier,
1350 int *truncp, int *created) 1352 int *truncp, int *created)
@@ -1396,7 +1398,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1396 if (err) 1398 if (err)
1397 goto out; 1399 goto out;
1398 1400
1399 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1401 if (nfsd_create_is_exclusive(createmode)) {
1400 /* solaris7 gets confused (bugid 4218508) if these have 1402 /* solaris7 gets confused (bugid 4218508) if these have
1401 * the high bit set, so just clear the high bits. If this is 1403 * the high bit set, so just clear the high bits. If this is
1402 * ever changed to use different attrs for storing the 1404 * ever changed to use different attrs for storing the
@@ -1437,6 +1439,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1437 && dchild->d_inode->i_atime.tv_sec == v_atime 1439 && dchild->d_inode->i_atime.tv_sec == v_atime
1438 && dchild->d_inode->i_size == 0 ) 1440 && dchild->d_inode->i_size == 0 )
1439 break; 1441 break;
1442 case NFS4_CREATE_EXCLUSIVE4_1:
1443 if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
1444 && dchild->d_inode->i_atime.tv_sec == v_atime
1445 && dchild->d_inode->i_size == 0 )
1446 goto set_attr;
1440 /* fallthru */ 1447 /* fallthru */
1441 case NFS3_CREATE_GUARDED: 1448 case NFS3_CREATE_GUARDED:
1442 err = nfserr_exist; 1449 err = nfserr_exist;
@@ -1455,7 +1462,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1455 1462
1456 nfsd_check_ignore_resizing(iap); 1463 nfsd_check_ignore_resizing(iap);
1457 1464
1458 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1465 if (nfsd_create_is_exclusive(createmode)) {
1459 /* Cram the verifier into atime/mtime */ 1466 /* Cram the verifier into atime/mtime */
1460 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1467 iap->ia_valid = ATTR_MTIME|ATTR_ATIME
1461 | ATTR_MTIME_SET|ATTR_ATIME_SET; 1468 | ATTR_MTIME_SET|ATTR_ATIME_SET;
@@ -2034,7 +2041,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2034 struct inode *inode = dentry->d_inode; 2041 struct inode *inode = dentry->d_inode;
2035 int err; 2042 int err;
2036 2043
2037 if (acc == NFSD_MAY_NOP) 2044 if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
2038 return 0; 2045 return 0;
2039#if 0 2046#if 0
2040 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", 2047 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 9a370a5e36b7..e0bbac04d1dd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -17,10 +17,14 @@
17#define NFSD_MAY_SATTR 8 17#define NFSD_MAY_SATTR 8
18#define NFSD_MAY_TRUNC 16 18#define NFSD_MAY_TRUNC 16
19#define NFSD_MAY_LOCK 32 19#define NFSD_MAY_LOCK 32
20#define NFSD_MAY_MASK 63
21
22/* extra hints to permission and open routines: */
20#define NFSD_MAY_OWNER_OVERRIDE 64 23#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 24#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 25#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23#define NFSD_MAY_NOT_BREAK_LEASE 512 26#define NFSD_MAY_NOT_BREAK_LEASE 512
27#define NFSD_MAY_BYPASS_GSS 1024
24 28
25#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 29#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
26#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 30#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
@@ -54,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
54 int type, dev_t rdev, struct svc_fh *res); 58 int type, dev_t rdev, struct svc_fh *res);
55#ifdef CONFIG_NFSD_V3 59#ifdef CONFIG_NFSD_V3
56__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); 60__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
57__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, 61__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
58 char *name, int len, struct iattr *attrs, 62 char *name, int len, struct iattr *attrs,
59 struct svc_fh *res, int createmode, 63 struct svc_fh *res, int createmode,
60 u32 *verifier, int *truncp, int *created); 64 u32 *verifier, int *truncp, int *created);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 178fafe0ff93..8e66c5ccc1c4 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -570,9 +570,11 @@ struct nfs4_sessionid {
570}; 570};
571 571
572/* Create Session Flags */ 572/* Create Session Flags */
573#define SESSION4_PERSIST 0x001 573#define SESSION4_PERSIST 0x001
574#define SESSION4_BACK_CHAN 0x002 574#define SESSION4_BACK_CHAN 0x002
575#define SESSION4_RDMA 0x004 575#define SESSION4_RDMA 0x004
576
577#define SESSION4_FLAG_MASK_A 0x007
576 578
577enum state_protect_how4 { 579enum state_protect_how4 {
578 SP4_NONE = 0, 580 SP4_NONE = 0,
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f2..85c50b40759d 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
28 /* private TCP part */ 28 /* private TCP part */
29 u32 sk_reclen; /* length of record */ 29 u32 sk_reclen; /* length of record */
30 u32 sk_tcplen; /* current read length */ 30 u32 sk_tcplen; /* current read length */
31 struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
31}; 32};
32 33
33/* 34/*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b7d435c3f19e..af04f779ce9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
387 return len; 387 return len;
388} 388}
389 389
390static int svc_partial_recvfrom(struct svc_rqst *rqstp,
391 struct kvec *iov, int nr,
392 int buflen, unsigned int base)
393{
394 size_t save_iovlen;
395 void __user *save_iovbase;
396 unsigned int i;
397 int ret;
398
399 if (base == 0)
400 return svc_recvfrom(rqstp, iov, nr, buflen);
401
402 for (i = 0; i < nr; i++) {
403 if (iov[i].iov_len > base)
404 break;
405 base -= iov[i].iov_len;
406 }
407 save_iovlen = iov[i].iov_len;
408 save_iovbase = iov[i].iov_base;
409 iov[i].iov_len -= base;
410 iov[i].iov_base += base;
411 ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
412 iov[i].iov_len = save_iovlen;
413 iov[i].iov_base = save_iovbase;
414 return ret;
415}
416
390/* 417/*
391 * Set socket snd and rcv buffer lengths 418 * Set socket snd and rcv buffer lengths
392 */ 419 */
@@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
409 lock_sock(sock->sk); 436 lock_sock(sock->sk);
410 sock->sk->sk_sndbuf = snd * 2; 437 sock->sk->sk_sndbuf = snd * 2;
411 sock->sk->sk_rcvbuf = rcv * 2; 438 sock->sk->sk_rcvbuf = rcv * 2;
412 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
413 sock->sk->sk_write_space(sock->sk); 439 sock->sk->sk_write_space(sock->sk);
414 release_sock(sock->sk); 440 release_sock(sock->sk);
415#endif 441#endif
@@ -884,6 +910,56 @@ failed:
884 return NULL; 910 return NULL;
885} 911}
886 912
913static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
914{
915 unsigned int i, len, npages;
916
917 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
918 return 0;
919 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
920 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
921 for (i = 0; i < npages; i++) {
922 if (rqstp->rq_pages[i] != NULL)
923 put_page(rqstp->rq_pages[i]);
924 BUG_ON(svsk->sk_pages[i] == NULL);
925 rqstp->rq_pages[i] = svsk->sk_pages[i];
926 svsk->sk_pages[i] = NULL;
927 }
928 rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
929 return len;
930}
931
932static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
933{
934 unsigned int i, len, npages;
935
936 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
937 return;
938 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
939 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
940 for (i = 0; i < npages; i++) {
941 svsk->sk_pages[i] = rqstp->rq_pages[i];
942 rqstp->rq_pages[i] = NULL;
943 }
944}
945
946static void svc_tcp_clear_pages(struct svc_sock *svsk)
947{
948 unsigned int i, len, npages;
949
950 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
951 goto out;
952 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
953 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
954 for (i = 0; i < npages; i++) {
955 BUG_ON(svsk->sk_pages[i] == NULL);
956 put_page(svsk->sk_pages[i]);
957 svsk->sk_pages[i] = NULL;
958 }
959out:
960 svsk->sk_tcplen = 0;
961}
962
887/* 963/*
888 * Receive data. 964 * Receive data.
889 * If we haven't gotten the record length yet, get the next four bytes. 965 * If we haven't gotten the record length yet, get the next four bytes.
@@ -893,31 +969,15 @@ failed:
893static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 969static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
894{ 970{
895 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 971 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
972 unsigned int want;
896 int len; 973 int len;
897 974
898 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
899 /* sndbuf needs to have room for one request
900 * per thread, otherwise we can stall even when the
901 * network isn't a bottleneck.
902 *
903 * We count all threads rather than threads in a
904 * particular pool, which provides an upper bound
905 * on the number of threads which will access the socket.
906 *
907 * rcvbuf just needs to be able to hold a few requests.
908 * Normally they will be removed from the queue
909 * as soon a a complete request arrives.
910 */
911 svc_sock_setbufsize(svsk->sk_sock,
912 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
913 3 * serv->sv_max_mesg);
914
915 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 975 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
916 976
917 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 977 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
918 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
919 struct kvec iov; 978 struct kvec iov;
920 979
980 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
921 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 981 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
922 iov.iov_len = want; 982 iov.iov_len = want;
923 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 983 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
927 if (len < want) { 987 if (len < want) {
928 dprintk("svc: short recvfrom while reading record " 988 dprintk("svc: short recvfrom while reading record "
929 "length (%d of %d)\n", len, want); 989 "length (%d of %d)\n", len, want);
930 goto err_again; /* record header not complete */ 990 return -EAGAIN;
931 } 991 }
932 992
933 svsk->sk_reclen = ntohl(svsk->sk_reclen); 993 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
954 } 1014 }
955 } 1015 }
956 1016
957 /* Check whether enough data is available */ 1017 if (svsk->sk_reclen < 8)
958 len = svc_recv_available(svsk); 1018 goto err_delete; /* client is nuts. */
959 if (len < 0)
960 goto error;
961 1019
962 if (len < svsk->sk_reclen) {
963 dprintk("svc: incomplete TCP record (%d of %d)\n",
964 len, svsk->sk_reclen);
965 goto err_again; /* record not complete */
966 }
967 len = svsk->sk_reclen; 1020 len = svsk->sk_reclen;
968 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
969 1021
970 return len; 1022 return len;
971 error: 1023error:
972 if (len == -EAGAIN) 1024 dprintk("RPC: TCP recv_record got %d\n", len);
973 dprintk("RPC: TCP recv_record got EAGAIN\n");
974 return len; 1025 return len;
975 err_delete: 1026err_delete:
976 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1027 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
977 err_again:
978 return -EAGAIN; 1028 return -EAGAIN;
979} 1029}
980 1030
981static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 1031static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
982 struct rpc_rqst **reqpp, struct kvec *vec)
983{ 1032{
1033 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
984 struct rpc_rqst *req = NULL; 1034 struct rpc_rqst *req = NULL;
985 u32 *p; 1035 struct kvec *src, *dst;
986 u32 xid; 1036 __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
987 u32 calldir; 1037 __be32 xid;
988 int len; 1038 __be32 calldir;
989
990 len = svc_recvfrom(rqstp, vec, 1, 8);
991 if (len < 0)
992 goto error;
993 1039
994 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
995 xid = *p++; 1040 xid = *p++;
996 calldir = *p; 1041 calldir = *p;
997 1042
998 if (calldir == 0) { 1043 if (bc_xprt)
999 /* REQUEST is the most common case */ 1044 req = xprt_lookup_rqst(bc_xprt, xid);
1000 vec[0] = rqstp->rq_arg.head[0];
1001 } else {
1002 /* REPLY */
1003 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
1004
1005 if (bc_xprt)
1006 req = xprt_lookup_rqst(bc_xprt, xid);
1007
1008 if (!req) {
1009 printk(KERN_NOTICE
1010 "%s: Got unrecognized reply: "
1011 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1012 __func__, ntohl(calldir),
1013 bc_xprt, xid);
1014 vec[0] = rqstp->rq_arg.head[0];
1015 goto out;
1016 }
1017 1045
1018 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1046 if (!req) {
1019 sizeof(struct xdr_buf)); 1047 printk(KERN_NOTICE
1020 /* copy the xid and call direction */ 1048 "%s: Got unrecognized reply: "
1021 memcpy(req->rq_private_buf.head[0].iov_base, 1049 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1022 rqstp->rq_arg.head[0].iov_base, 8); 1050 __func__, ntohl(calldir),
1023 vec[0] = req->rq_private_buf.head[0]; 1051 bc_xprt, xid);
1052 return -EAGAIN;
1024 } 1053 }
1025 out: 1054
1026 vec[0].iov_base += 8; 1055 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
1027 vec[0].iov_len -= 8; 1056 /*
1028 len = svsk->sk_reclen - 8; 1057 * XXX!: cheating for now! Only copying HEAD.
1029 error: 1058 * But we know this is good enough for now (in fact, for any
1030 *reqpp = req; 1059 * callback reply in the forseeable future).
1031 return len; 1060 */
1061 dst = &req->rq_private_buf.head[0];
1062 src = &rqstp->rq_arg.head[0];
1063 if (dst->iov_len < src->iov_len)
1064 return -EAGAIN; /* whatever; just giving up. */
1065 memcpy(dst->iov_base, src->iov_base, src->iov_len);
1066 xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
1067 rqstp->rq_arg.len = 0;
1068 return 0;
1032} 1069}
1033 1070
1071static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1072{
1073 int i = 0;
1074 int t = 0;
1075
1076 while (t < len) {
1077 vec[i].iov_base = page_address(pages[i]);
1078 vec[i].iov_len = PAGE_SIZE;
1079 i++;
1080 t += PAGE_SIZE;
1081 }
1082 return i;
1083}
1084
1085
1034/* 1086/*
1035 * Receive data from a TCP socket. 1087 * Receive data from a TCP socket.
1036 */ 1088 */
@@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1041 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1093 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1042 int len; 1094 int len;
1043 struct kvec *vec; 1095 struct kvec *vec;
1044 int pnum, vlen; 1096 unsigned int want, base;
1045 struct rpc_rqst *req = NULL; 1097 __be32 *p;
1098 __be32 calldir;
1099 int pnum;
1046 1100
1047 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1101 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1048 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1102 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1053 if (len < 0) 1107 if (len < 0)
1054 goto error; 1108 goto error;
1055 1109
1110 base = svc_tcp_restore_pages(svsk, rqstp);
1111 want = svsk->sk_reclen - base;
1112
1056 vec = rqstp->rq_vec; 1113 vec = rqstp->rq_vec;
1057 vec[0] = rqstp->rq_arg.head[0];
1058 vlen = PAGE_SIZE;
1059 1114
1060 /* 1115 pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
1061 * We have enough data for the whole tcp record. Let's try and read the 1116 svsk->sk_reclen);
1062 * first 8 bytes to get the xid and the call direction. We can use this
1063 * to figure out if this is a call or a reply to a callback. If
1064 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1065 * In that case, don't bother with the calldir and just read the data.
1066 * It will be rejected in svc_process.
1067 */
1068 if (len >= 8) {
1069 len = svc_process_calldir(svsk, rqstp, &req, vec);
1070 if (len < 0)
1071 goto err_again;
1072 vlen -= 8;
1073 }
1074 1117
1075 pnum = 1;
1076 while (vlen < len) {
1077 vec[pnum].iov_base = (req) ?
1078 page_address(req->rq_private_buf.pages[pnum - 1]) :
1079 page_address(rqstp->rq_pages[pnum]);
1080 vec[pnum].iov_len = PAGE_SIZE;
1081 pnum++;
1082 vlen += PAGE_SIZE;
1083 }
1084 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1118 rqstp->rq_respages = &rqstp->rq_pages[pnum];
1085 1119
1086 /* Now receive data */ 1120 /* Now receive data */
1087 len = svc_recvfrom(rqstp, vec, pnum, len); 1121 len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
1088 if (len < 0) 1122 if (len >= 0)
1089 goto err_again; 1123 svsk->sk_tcplen += len;
1090 1124 if (len != want) {
1091 /* 1125 if (len < 0 && len != -EAGAIN)
1092 * Account for the 8 bytes we read earlier 1126 goto err_other;
1093 */ 1127 svc_tcp_save_pages(svsk, rqstp);
1094 len += 8; 1128 dprintk("svc: incomplete TCP record (%d of %d)\n",
1095 1129 svsk->sk_tcplen, svsk->sk_reclen);
1096 if (req) { 1130 goto err_noclose;
1097 xprt_complete_rqst(req->rq_task, len);
1098 len = 0;
1099 goto out;
1100 } 1131 }
1101 dprintk("svc: TCP complete record (%d bytes)\n", len); 1132
1102 rqstp->rq_arg.len = len; 1133 rqstp->rq_arg.len = svsk->sk_reclen;
1103 rqstp->rq_arg.page_base = 0; 1134 rqstp->rq_arg.page_base = 0;
1104 if (len <= rqstp->rq_arg.head[0].iov_len) { 1135 if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1105 rqstp->rq_arg.head[0].iov_len = len; 1136 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1106 rqstp->rq_arg.page_len = 0; 1137 rqstp->rq_arg.page_len = 0;
1107 } else { 1138 } else
1108 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1139 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1109 }
1110 1140
1111 rqstp->rq_xprt_ctxt = NULL; 1141 rqstp->rq_xprt_ctxt = NULL;
1112 rqstp->rq_prot = IPPROTO_TCP; 1142 rqstp->rq_prot = IPPROTO_TCP;
1113 1143
1114out: 1144 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1145 calldir = p[1];
1146 if (calldir)
1147 len = receive_cb_reply(svsk, rqstp);
1148
1115 /* Reset TCP read info */ 1149 /* Reset TCP read info */
1116 svsk->sk_reclen = 0; 1150 svsk->sk_reclen = 0;
1117 svsk->sk_tcplen = 0; 1151 svsk->sk_tcplen = 0;
1152 /* If we have more data, signal svc_xprt_enqueue() to try again */
1153 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1154 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1155
1156 if (len < 0)
1157 goto error;
1118 1158
1119 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1159 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1120 if (serv->sv_stats) 1160 if (serv->sv_stats)
1121 serv->sv_stats->nettcpcnt++; 1161 serv->sv_stats->nettcpcnt++;
1122 1162
1123 return len; 1163 dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
1164 return rqstp->rq_arg.len;
1124 1165
1125err_again:
1126 if (len == -EAGAIN) {
1127 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1128 return len;
1129 }
1130error: 1166error:
1131 if (len != -EAGAIN) { 1167 if (len != -EAGAIN)
1132 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1168 goto err_other;
1133 svsk->sk_xprt.xpt_server->sv_name, -len); 1169 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1134 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1135 }
1136 return -EAGAIN; 1170 return -EAGAIN;
1171err_other:
1172 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1173 svsk->sk_xprt.xpt_server->sv_name, -len);
1174 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1175err_noclose:
1176 return -EAGAIN; /* record not complete */
1137} 1177}
1138 1178
1139/* 1179/*
@@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1304 1344
1305 svsk->sk_reclen = 0; 1345 svsk->sk_reclen = 0;
1306 svsk->sk_tcplen = 0; 1346 svsk->sk_tcplen = 0;
1347 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1307 1348
1308 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1349 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1309 1350
1310 /* initialise setting must have enough space to
1311 * receive and respond to one request.
1312 * svc_tcp_recvfrom will re-adjust if necessary
1313 */
1314 svc_sock_setbufsize(svsk->sk_sock,
1315 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1316 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1317
1318 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1319 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1351 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1320 if (sk->sk_state != TCP_ESTABLISHED) 1352 if (sk->sk_state != TCP_ESTABLISHED)
1321 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1353 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1379 /* Initialize the socket */ 1411 /* Initialize the socket */
1380 if (sock->type == SOCK_DGRAM) 1412 if (sock->type == SOCK_DGRAM)
1381 svc_udp_init(svsk, serv); 1413 svc_udp_init(svsk, serv);
1382 else 1414 else {
1415 /* initialise setting must have enough space to
1416 * receive and respond to one request.
1417 */
1418 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1419 4 * serv->sv_max_mesg);
1383 svc_tcp_init(svsk, serv); 1420 svc_tcp_init(svsk, serv);
1421 }
1384 1422
1385 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1423 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1386 svsk, svsk->sk_sk); 1424 svsk, svsk->sk_sk);
@@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1562 1600
1563 svc_sock_detach(xprt); 1601 svc_sock_detach(xprt);
1564 1602
1565 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1603 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1604 svc_tcp_clear_pages(svsk);
1566 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1605 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
1606 }
1567} 1607}
1568 1608
1569/* 1609/*