diff options
53 files changed, 3954 insertions, 2164 deletions
@@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library) | |||
126 | struct nameidata nd; | 126 | struct nameidata nd; |
127 | int error; | 127 | int error; |
128 | 128 | ||
129 | nd.intent.open.flags = FMODE_READ; | 129 | error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); |
130 | error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); | ||
131 | if (error) | 130 | if (error) |
132 | goto out; | 131 | goto out; |
133 | 132 | ||
@@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library) | |||
139 | if (error) | 138 | if (error) |
140 | goto exit; | 139 | goto exit; |
141 | 140 | ||
142 | file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); | 141 | file = nameidata_to_filp(&nd, O_RDONLY); |
143 | error = PTR_ERR(file); | 142 | error = PTR_ERR(file); |
144 | if (IS_ERR(file)) | 143 | if (IS_ERR(file)) |
145 | goto out; | 144 | goto out; |
@@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library) | |||
167 | out: | 166 | out: |
168 | return error; | 167 | return error; |
169 | exit: | 168 | exit: |
169 | release_open_intent(&nd); | ||
170 | path_release(&nd); | 170 | path_release(&nd); |
171 | goto out; | 171 | goto out; |
172 | } | 172 | } |
@@ -490,8 +490,7 @@ struct file *open_exec(const char *name) | |||
490 | int err; | 490 | int err; |
491 | struct file *file; | 491 | struct file *file; |
492 | 492 | ||
493 | nd.intent.open.flags = FMODE_READ; | 493 | err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); |
494 | err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); | ||
495 | file = ERR_PTR(err); | 494 | file = ERR_PTR(err); |
496 | 495 | ||
497 | if (!err) { | 496 | if (!err) { |
@@ -504,7 +503,7 @@ struct file *open_exec(const char *name) | |||
504 | err = -EACCES; | 503 | err = -EACCES; |
505 | file = ERR_PTR(err); | 504 | file = ERR_PTR(err); |
506 | if (!err) { | 505 | if (!err) { |
507 | file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); | 506 | file = nameidata_to_filp(&nd, O_RDONLY); |
508 | if (!IS_ERR(file)) { | 507 | if (!IS_ERR(file)) { |
509 | err = deny_write_access(file); | 508 | err = deny_write_access(file); |
510 | if (err) { | 509 | if (err) { |
@@ -516,6 +515,7 @@ out: | |||
516 | return file; | 515 | return file; |
517 | } | 516 | } |
518 | } | 517 | } |
518 | release_open_intent(&nd); | ||
519 | path_release(&nd); | 519 | path_release(&nd); |
520 | } | 520 | } |
521 | goto out; | 521 | goto out; |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 82c77df81c5f..c4c8601096e0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host) | |||
173 | 173 | ||
174 | /* If we've already created an RPC client, check whether | 174 | /* If we've already created an RPC client, check whether |
175 | * RPC rebind is required | 175 | * RPC rebind is required |
176 | * Note: why keep rebinding if we're on a tcp connection? | ||
177 | */ | 176 | */ |
178 | if ((clnt = host->h_rpcclnt) != NULL) { | 177 | if ((clnt = host->h_rpcclnt) != NULL) { |
179 | xprt = clnt->cl_xprt; | 178 | xprt = clnt->cl_xprt; |
180 | if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) { | 179 | if (time_after_eq(jiffies, host->h_nextrebind)) { |
181 | clnt->cl_port = 0; | 180 | clnt->cl_port = 0; |
182 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 181 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
183 | dprintk("lockd: next rebind in %ld jiffies\n", | 182 | dprintk("lockd: next rebind in %ld jiffies\n", |
@@ -189,7 +188,6 @@ nlm_bind_host(struct nlm_host *host) | |||
189 | goto forgetit; | 188 | goto forgetit; |
190 | 189 | ||
191 | xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); | 190 | xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); |
192 | xprt->nocong = 1; /* No congestion control for NLM */ | ||
193 | xprt->resvport = 1; /* NLM requires a reserved port */ | 191 | xprt->resvport = 1; /* NLM requires a reserved port */ |
194 | 192 | ||
195 | /* Existing NLM servers accept AUTH_UNIX only */ | 193 | /* Existing NLM servers accept AUTH_UNIX only */ |
diff --git a/fs/locks.c b/fs/locks.c index f7daa5f48949..a1e8b2248014 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | |||
316 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; | 316 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; |
317 | POSIX-2001 defines it. */ | 317 | POSIX-2001 defines it. */ |
318 | start += l->l_start; | 318 | start += l->l_start; |
319 | end = start + l->l_len - 1; | 319 | if (start < 0) |
320 | if (l->l_len < 0) { | 320 | return -EINVAL; |
321 | fl->fl_end = OFFSET_MAX; | ||
322 | if (l->l_len > 0) { | ||
323 | end = start + l->l_len - 1; | ||
324 | fl->fl_end = end; | ||
325 | } else if (l->l_len < 0) { | ||
321 | end = start - 1; | 326 | end = start - 1; |
327 | fl->fl_end = end; | ||
322 | start += l->l_len; | 328 | start += l->l_len; |
329 | if (start < 0) | ||
330 | return -EINVAL; | ||
323 | } | 331 | } |
324 | |||
325 | if (start < 0) | ||
326 | return -EINVAL; | ||
327 | if (l->l_len > 0 && end < 0) | ||
328 | return -EOVERFLOW; | ||
329 | |||
330 | fl->fl_start = start; /* we record the absolute position */ | 332 | fl->fl_start = start; /* we record the absolute position */ |
331 | fl->fl_end = end; | 333 | if (fl->fl_end < fl->fl_start) |
332 | if (l->l_len == 0) | 334 | return -EOVERFLOW; |
333 | fl->fl_end = OFFSET_MAX; | ||
334 | 335 | ||
335 | fl->fl_owner = current->files; | 336 | fl->fl_owner = current->files; |
336 | fl->fl_pid = current->tgid; | 337 | fl->fl_pid = current->tgid; |
@@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | |||
362 | return -EINVAL; | 363 | return -EINVAL; |
363 | } | 364 | } |
364 | 365 | ||
365 | if (((start += l->l_start) < 0) || (l->l_len < 0)) | 366 | start += l->l_start; |
367 | if (start < 0) | ||
366 | return -EINVAL; | 368 | return -EINVAL; |
367 | fl->fl_end = start + l->l_len - 1; | 369 | fl->fl_end = OFFSET_MAX; |
368 | if (l->l_len > 0 && fl->fl_end < 0) | 370 | if (l->l_len > 0) { |
369 | return -EOVERFLOW; | 371 | fl->fl_end = start + l->l_len - 1; |
372 | } else if (l->l_len < 0) { | ||
373 | fl->fl_end = start - 1; | ||
374 | start += l->l_len; | ||
375 | if (start < 0) | ||
376 | return -EINVAL; | ||
377 | } | ||
370 | fl->fl_start = start; /* we record the absolute position */ | 378 | fl->fl_start = start; /* we record the absolute position */ |
371 | if (l->l_len == 0) | 379 | if (fl->fl_end < fl->fl_start) |
372 | fl->fl_end = OFFSET_MAX; | 380 | return -EOVERFLOW; |
373 | 381 | ||
374 | fl->fl_owner = current->files; | 382 | fl->fl_owner = current->files; |
375 | fl->fl_pid = current->tgid; | 383 | fl->fl_pid = current->tgid; |
@@ -829,12 +837,16 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request) | |||
829 | /* Detect adjacent or overlapping regions (if same lock type) | 837 | /* Detect adjacent or overlapping regions (if same lock type) |
830 | */ | 838 | */ |
831 | if (request->fl_type == fl->fl_type) { | 839 | if (request->fl_type == fl->fl_type) { |
840 | /* In all comparisons of start vs end, use | ||
841 | * "start - 1" rather than "end + 1". If end | ||
842 | * is OFFSET_MAX, end + 1 will become negative. | ||
843 | */ | ||
832 | if (fl->fl_end < request->fl_start - 1) | 844 | if (fl->fl_end < request->fl_start - 1) |
833 | goto next_lock; | 845 | goto next_lock; |
834 | /* If the next lock in the list has entirely bigger | 846 | /* If the next lock in the list has entirely bigger |
835 | * addresses than the new one, insert the lock here. | 847 | * addresses than the new one, insert the lock here. |
836 | */ | 848 | */ |
837 | if (fl->fl_start > request->fl_end + 1) | 849 | if (fl->fl_start - 1 > request->fl_end) |
838 | break; | 850 | break; |
839 | 851 | ||
840 | /* If we come here, the new and old lock are of the | 852 | /* If we come here, the new and old lock are of the |
diff --git a/fs/namei.c b/fs/namei.c index aa62dbda93ac..aaaa81036234 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
29 | #include <linux/mount.h> | 29 | #include <linux/mount.h> |
30 | #include <linux/audit.h> | 30 | #include <linux/audit.h> |
31 | #include <linux/file.h> | ||
31 | #include <asm/namei.h> | 32 | #include <asm/namei.h> |
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | 34 | ||
@@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd) | |||
317 | mntput_no_expire(nd->mnt); | 318 | mntput_no_expire(nd->mnt); |
318 | } | 319 | } |
319 | 320 | ||
321 | /** | ||
322 | * release_open_intent - free up open intent resources | ||
323 | * @nd: pointer to nameidata | ||
324 | */ | ||
325 | void release_open_intent(struct nameidata *nd) | ||
326 | { | ||
327 | if (nd->intent.open.file->f_dentry == NULL) | ||
328 | put_filp(nd->intent.open.file); | ||
329 | else | ||
330 | fput(nd->intent.open.file); | ||
331 | } | ||
332 | |||
320 | /* | 333 | /* |
321 | * Internal lookup() using the new generic dcache. | 334 | * Internal lookup() using the new generic dcache. |
322 | * SMP-safe | 335 | * SMP-safe |
@@ -750,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) | |||
750 | struct qstr this; | 763 | struct qstr this; |
751 | unsigned int c; | 764 | unsigned int c; |
752 | 765 | ||
766 | nd->flags |= LOOKUP_CONTINUE; | ||
753 | err = exec_permission_lite(inode, nd); | 767 | err = exec_permission_lite(inode, nd); |
754 | if (err == -EAGAIN) { | 768 | if (err == -EAGAIN) { |
755 | err = permission(inode, MAY_EXEC, nd); | 769 | err = permission(inode, MAY_EXEC, nd); |
@@ -802,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) | |||
802 | if (err < 0) | 816 | if (err < 0) |
803 | break; | 817 | break; |
804 | } | 818 | } |
805 | nd->flags |= LOOKUP_CONTINUE; | ||
806 | /* This does the actual lookups.. */ | 819 | /* This does the actual lookups.. */ |
807 | err = do_lookup(nd, &this, &next); | 820 | err = do_lookup(nd, &this, &next); |
808 | if (err) | 821 | if (err) |
@@ -1052,6 +1065,70 @@ out: | |||
1052 | return retval; | 1065 | return retval; |
1053 | } | 1066 | } |
1054 | 1067 | ||
1068 | static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, | ||
1069 | struct nameidata *nd, int open_flags, int create_mode) | ||
1070 | { | ||
1071 | struct file *filp = get_empty_filp(); | ||
1072 | int err; | ||
1073 | |||
1074 | if (filp == NULL) | ||
1075 | return -ENFILE; | ||
1076 | nd->intent.open.file = filp; | ||
1077 | nd->intent.open.flags = open_flags; | ||
1078 | nd->intent.open.create_mode = create_mode; | ||
1079 | err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); | ||
1080 | if (IS_ERR(nd->intent.open.file)) { | ||
1081 | if (err == 0) { | ||
1082 | err = PTR_ERR(nd->intent.open.file); | ||
1083 | path_release(nd); | ||
1084 | } | ||
1085 | } else if (err != 0) | ||
1086 | release_open_intent(nd); | ||
1087 | return err; | ||
1088 | } | ||
1089 | |||
1090 | /** | ||
1091 | * path_lookup_open - lookup a file path with open intent | ||
1092 | * @name: pointer to file name | ||
1093 | * @lookup_flags: lookup intent flags | ||
1094 | * @nd: pointer to nameidata | ||
1095 | * @open_flags: open intent flags | ||
1096 | */ | ||
1097 | int path_lookup_open(const char *name, unsigned int lookup_flags, | ||
1098 | struct nameidata *nd, int open_flags) | ||
1099 | { | ||
1100 | return __path_lookup_intent_open(name, lookup_flags, nd, | ||
1101 | open_flags, 0); | ||
1102 | } | ||
1103 | |||
1104 | /** | ||
1105 | * path_lookup_create - lookup a file path with open + create intent | ||
1106 | * @name: pointer to file name | ||
1107 | * @lookup_flags: lookup intent flags | ||
1108 | * @nd: pointer to nameidata | ||
1109 | * @open_flags: open intent flags | ||
1110 | * @create_mode: create intent flags | ||
1111 | */ | ||
1112 | int path_lookup_create(const char *name, unsigned int lookup_flags, | ||
1113 | struct nameidata *nd, int open_flags, int create_mode) | ||
1114 | { | ||
1115 | return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, | ||
1116 | open_flags, create_mode); | ||
1117 | } | ||
1118 | |||
1119 | int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, | ||
1120 | struct nameidata *nd, int open_flags) | ||
1121 | { | ||
1122 | char *tmp = getname(name); | ||
1123 | int err = PTR_ERR(tmp); | ||
1124 | |||
1125 | if (!IS_ERR(tmp)) { | ||
1126 | err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); | ||
1127 | putname(tmp); | ||
1128 | } | ||
1129 | return err; | ||
1130 | } | ||
1131 | |||
1055 | /* | 1132 | /* |
1056 | * Restricted form of lookup. Doesn't follow links, single-component only, | 1133 | * Restricted form of lookup. Doesn't follow links, single-component only, |
1057 | * needs parent already locked. Doesn't follow mounts. | 1134 | * needs parent already locked. Doesn't follow mounts. |
@@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1416 | */ | 1493 | */ |
1417 | int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) | 1494 | int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) |
1418 | { | 1495 | { |
1419 | int acc_mode, error = 0; | 1496 | int acc_mode, error; |
1420 | struct path path; | 1497 | struct path path; |
1421 | struct dentry *dir; | 1498 | struct dentry *dir; |
1422 | int count = 0; | 1499 | int count = 0; |
1423 | 1500 | ||
1424 | acc_mode = ACC_MODE(flag); | 1501 | acc_mode = ACC_MODE(flag); |
1425 | 1502 | ||
1503 | /* O_TRUNC implies we need access checks for write permissions */ | ||
1504 | if (flag & O_TRUNC) | ||
1505 | acc_mode |= MAY_WRITE; | ||
1506 | |||
1426 | /* Allow the LSM permission hook to distinguish append | 1507 | /* Allow the LSM permission hook to distinguish append |
1427 | access from general write access. */ | 1508 | access from general write access. */ |
1428 | if (flag & O_APPEND) | 1509 | if (flag & O_APPEND) |
1429 | acc_mode |= MAY_APPEND; | 1510 | acc_mode |= MAY_APPEND; |
1430 | 1511 | ||
1431 | /* Fill in the open() intent data */ | ||
1432 | nd->intent.open.flags = flag; | ||
1433 | nd->intent.open.create_mode = mode; | ||
1434 | |||
1435 | /* | 1512 | /* |
1436 | * The simplest case - just a plain lookup. | 1513 | * The simplest case - just a plain lookup. |
1437 | */ | 1514 | */ |
1438 | if (!(flag & O_CREAT)) { | 1515 | if (!(flag & O_CREAT)) { |
1439 | error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); | 1516 | error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); |
1440 | if (error) | 1517 | if (error) |
1441 | return error; | 1518 | return error; |
1442 | goto ok; | 1519 | goto ok; |
@@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) | |||
1445 | /* | 1522 | /* |
1446 | * Create - we need to know the parent. | 1523 | * Create - we need to know the parent. |
1447 | */ | 1524 | */ |
1448 | error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); | 1525 | error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); |
1449 | if (error) | 1526 | if (error) |
1450 | return error; | 1527 | return error; |
1451 | 1528 | ||
@@ -1520,6 +1597,8 @@ ok: | |||
1520 | exit_dput: | 1597 | exit_dput: |
1521 | dput_path(&path, nd); | 1598 | dput_path(&path, nd); |
1522 | exit: | 1599 | exit: |
1600 | if (!IS_ERR(nd->intent.open.file)) | ||
1601 | release_open_intent(nd); | ||
1523 | path_release(nd); | 1602 | path_release(nd); |
1524 | return error; | 1603 | return error; |
1525 | 1604 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4a36839f0bbd..44135af9894c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -142,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode) | |||
142 | /* | 142 | /* |
143 | * Basic procedure for returning a delegation to the server | 143 | * Basic procedure for returning a delegation to the server |
144 | */ | 144 | */ |
145 | int nfs_inode_return_delegation(struct inode *inode) | 145 | int __nfs_inode_return_delegation(struct inode *inode) |
146 | { | 146 | { |
147 | struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; | 147 | struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; |
148 | struct nfs_inode *nfsi = NFS_I(inode); | 148 | struct nfs_inode *nfsi = NFS_I(inode); |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3f6c45a29d6a..8017846b561f 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -25,7 +25,7 @@ struct nfs_delegation { | |||
25 | 25 | ||
26 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 26 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
27 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 27 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
28 | int nfs_inode_return_delegation(struct inode *inode); | 28 | int __nfs_inode_return_delegation(struct inode *inode); |
29 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); | 29 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); |
30 | 30 | ||
31 | struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); | 31 | struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); |
@@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags) | |||
47 | return 1; | 47 | return 1; |
48 | return 0; | 48 | return 0; |
49 | } | 49 | } |
50 | |||
51 | static inline int nfs_inode_return_delegation(struct inode *inode) | ||
52 | { | ||
53 | int err = 0; | ||
54 | |||
55 | if (NFS_I(inode)->delegation != NULL) | ||
56 | err = __nfs_inode_return_delegation(inode); | ||
57 | return err; | ||
58 | } | ||
50 | #else | 59 | #else |
51 | static inline int nfs_have_delegation(struct inode *inode, int flags) | 60 | static inline int nfs_have_delegation(struct inode *inode, int flags) |
52 | { | 61 | { |
53 | return 0; | 62 | return 0; |
54 | } | 63 | } |
64 | |||
65 | static inline int nfs_inode_return_delegation(struct inode *inode) | ||
66 | { | ||
67 | return 0; | ||
68 | } | ||
55 | #endif | 69 | #endif |
56 | 70 | ||
57 | #endif | 71 | #endif |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2df639f143e8..eb50c19fc253 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -565,8 +565,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
565 | } | 565 | } |
566 | } | 566 | } |
567 | unlock_kernel(); | 567 | unlock_kernel(); |
568 | if (desc->error < 0) | ||
569 | return desc->error; | ||
570 | if (res < 0) | 568 | if (res < 0) |
571 | return res; | 569 | return res; |
572 | return 0; | 570 | return 0; |
@@ -803,6 +801,7 @@ static int nfs_dentry_delete(struct dentry *dentry) | |||
803 | */ | 801 | */ |
804 | static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) | 802 | static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) |
805 | { | 803 | { |
804 | nfs_inode_return_delegation(inode); | ||
806 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { | 805 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { |
807 | lock_kernel(); | 806 | lock_kernel(); |
808 | inode->i_nlink--; | 807 | inode->i_nlink--; |
@@ -916,7 +915,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) | |||
916 | static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | 915 | static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
917 | { | 916 | { |
918 | struct dentry *res = NULL; | 917 | struct dentry *res = NULL; |
919 | struct inode *inode = NULL; | ||
920 | int error; | 918 | int error; |
921 | 919 | ||
922 | /* Check that we are indeed trying to open this file */ | 920 | /* Check that we are indeed trying to open this file */ |
@@ -930,8 +928,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
930 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; | 928 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; |
931 | 929 | ||
932 | /* Let vfs_create() deal with O_EXCL */ | 930 | /* Let vfs_create() deal with O_EXCL */ |
933 | if (nd->intent.open.flags & O_EXCL) | 931 | if (nd->intent.open.flags & O_EXCL) { |
934 | goto no_entry; | 932 | d_add(dentry, NULL); |
933 | goto out; | ||
934 | } | ||
935 | 935 | ||
936 | /* Open the file on the server */ | 936 | /* Open the file on the server */ |
937 | lock_kernel(); | 937 | lock_kernel(); |
@@ -945,32 +945,30 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
945 | 945 | ||
946 | if (nd->intent.open.flags & O_CREAT) { | 946 | if (nd->intent.open.flags & O_CREAT) { |
947 | nfs_begin_data_update(dir); | 947 | nfs_begin_data_update(dir); |
948 | inode = nfs4_atomic_open(dir, dentry, nd); | 948 | res = nfs4_atomic_open(dir, dentry, nd); |
949 | nfs_end_data_update(dir); | 949 | nfs_end_data_update(dir); |
950 | } else | 950 | } else |
951 | inode = nfs4_atomic_open(dir, dentry, nd); | 951 | res = nfs4_atomic_open(dir, dentry, nd); |
952 | unlock_kernel(); | 952 | unlock_kernel(); |
953 | if (IS_ERR(inode)) { | 953 | if (IS_ERR(res)) { |
954 | error = PTR_ERR(inode); | 954 | error = PTR_ERR(res); |
955 | switch (error) { | 955 | switch (error) { |
956 | /* Make a negative dentry */ | 956 | /* Make a negative dentry */ |
957 | case -ENOENT: | 957 | case -ENOENT: |
958 | inode = NULL; | 958 | res = NULL; |
959 | break; | 959 | goto out; |
960 | /* This turned out not to be a regular file */ | 960 | /* This turned out not to be a regular file */ |
961 | case -EISDIR: | ||
962 | case -ENOTDIR: | ||
963 | goto no_open; | ||
961 | case -ELOOP: | 964 | case -ELOOP: |
962 | if (!(nd->intent.open.flags & O_NOFOLLOW)) | 965 | if (!(nd->intent.open.flags & O_NOFOLLOW)) |
963 | goto no_open; | 966 | goto no_open; |
964 | /* case -EISDIR: */ | ||
965 | /* case -EINVAL: */ | 967 | /* case -EINVAL: */ |
966 | default: | 968 | default: |
967 | res = ERR_PTR(error); | ||
968 | goto out; | 969 | goto out; |
969 | } | 970 | } |
970 | } | 971 | } else if (res != NULL) |
971 | no_entry: | ||
972 | res = d_add_unique(dentry, inode); | ||
973 | if (res != NULL) | ||
974 | dentry = res; | 972 | dentry = res; |
975 | nfs_renew_times(dentry); | 973 | nfs_renew_times(dentry); |
976 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 974 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
@@ -1014,7 +1012,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1014 | */ | 1012 | */ |
1015 | lock_kernel(); | 1013 | lock_kernel(); |
1016 | verifier = nfs_save_change_attribute(dir); | 1014 | verifier = nfs_save_change_attribute(dir); |
1017 | ret = nfs4_open_revalidate(dir, dentry, openflags); | 1015 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); |
1018 | if (!ret) | 1016 | if (!ret) |
1019 | nfs_set_verifier(dentry, verifier); | 1017 | nfs_set_verifier(dentry, verifier); |
1020 | unlock_kernel(); | 1018 | unlock_kernel(); |
@@ -1137,7 +1135,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1137 | 1135 | ||
1138 | lock_kernel(); | 1136 | lock_kernel(); |
1139 | nfs_begin_data_update(dir); | 1137 | nfs_begin_data_update(dir); |
1140 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); | 1138 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); |
1141 | nfs_end_data_update(dir); | 1139 | nfs_end_data_update(dir); |
1142 | if (error != 0) | 1140 | if (error != 0) |
1143 | goto out_err; | 1141 | goto out_err; |
@@ -1332,6 +1330,7 @@ static int nfs_safe_remove(struct dentry *dentry) | |||
1332 | 1330 | ||
1333 | nfs_begin_data_update(dir); | 1331 | nfs_begin_data_update(dir); |
1334 | if (inode != NULL) { | 1332 | if (inode != NULL) { |
1333 | nfs_inode_return_delegation(inode); | ||
1335 | nfs_begin_data_update(inode); | 1334 | nfs_begin_data_update(inode); |
1336 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1335 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
1337 | /* The VFS may want to delete this inode */ | 1336 | /* The VFS may want to delete this inode */ |
@@ -1512,9 +1511,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1512 | */ | 1511 | */ |
1513 | if (!new_inode) | 1512 | if (!new_inode) |
1514 | goto go_ahead; | 1513 | goto go_ahead; |
1515 | if (S_ISDIR(new_inode->i_mode)) | 1514 | if (S_ISDIR(new_inode->i_mode)) { |
1516 | goto out; | 1515 | error = -EISDIR; |
1517 | else if (atomic_read(&new_dentry->d_count) > 2) { | 1516 | if (!S_ISDIR(old_inode->i_mode)) |
1517 | goto out; | ||
1518 | } else if (atomic_read(&new_dentry->d_count) > 2) { | ||
1518 | int err; | 1519 | int err; |
1519 | /* copy the target dentry's name */ | 1520 | /* copy the target dentry's name */ |
1520 | dentry = d_alloc(new_dentry->d_parent, | 1521 | dentry = d_alloc(new_dentry->d_parent, |
@@ -1539,7 +1540,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1539 | #endif | 1540 | #endif |
1540 | goto out; | 1541 | goto out; |
1541 | } | 1542 | } |
1542 | } | 1543 | } else |
1544 | new_inode->i_nlink--; | ||
1543 | 1545 | ||
1544 | go_ahead: | 1546 | go_ahead: |
1545 | /* | 1547 | /* |
@@ -1549,6 +1551,7 @@ go_ahead: | |||
1549 | nfs_wb_all(old_inode); | 1551 | nfs_wb_all(old_inode); |
1550 | shrink_dcache_parent(old_dentry); | 1552 | shrink_dcache_parent(old_dentry); |
1551 | } | 1553 | } |
1554 | nfs_inode_return_delegation(old_inode); | ||
1552 | 1555 | ||
1553 | if (new_inode) | 1556 | if (new_inode) |
1554 | d_delete(new_dentry); | 1557 | d_delete(new_dentry); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6bdcfa95de94..572d8593486f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -376,22 +376,31 @@ out_swapfile: | |||
376 | 376 | ||
377 | static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) | 377 | static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) |
378 | { | 378 | { |
379 | struct file_lock *cfl; | ||
379 | struct inode *inode = filp->f_mapping->host; | 380 | struct inode *inode = filp->f_mapping->host; |
380 | int status = 0; | 381 | int status = 0; |
381 | 382 | ||
382 | lock_kernel(); | 383 | lock_kernel(); |
383 | /* Use local locking if mounted with "-onolock" */ | 384 | /* Try local locking first */ |
384 | if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) | 385 | cfl = posix_test_lock(filp, fl); |
385 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); | 386 | if (cfl != NULL) { |
386 | else { | 387 | locks_copy_lock(fl, cfl); |
387 | struct file_lock *cfl = posix_test_lock(filp, fl); | 388 | goto out; |
388 | |||
389 | fl->fl_type = F_UNLCK; | ||
390 | if (cfl != NULL) | ||
391 | memcpy(fl, cfl, sizeof(*fl)); | ||
392 | } | 389 | } |
390 | |||
391 | if (nfs_have_delegation(inode, FMODE_READ)) | ||
392 | goto out_noconflict; | ||
393 | |||
394 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) | ||
395 | goto out_noconflict; | ||
396 | |||
397 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); | ||
398 | out: | ||
393 | unlock_kernel(); | 399 | unlock_kernel(); |
394 | return status; | 400 | return status; |
401 | out_noconflict: | ||
402 | fl->fl_type = F_UNLCK; | ||
403 | goto out; | ||
395 | } | 404 | } |
396 | 405 | ||
397 | static int do_vfs_lock(struct file *file, struct file_lock *fl) | 406 | static int do_vfs_lock(struct file *file, struct file_lock *fl) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d4eadeea128e..65d5ab45ddc5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -358,6 +358,35 @@ out_no_root: | |||
358 | return no_root_error; | 358 | return no_root_error; |
359 | } | 359 | } |
360 | 360 | ||
361 | static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) | ||
362 | { | ||
363 | to->to_initval = timeo * HZ / 10; | ||
364 | to->to_retries = retrans; | ||
365 | if (!to->to_retries) | ||
366 | to->to_retries = 2; | ||
367 | |||
368 | switch (proto) { | ||
369 | case IPPROTO_TCP: | ||
370 | if (!to->to_initval) | ||
371 | to->to_initval = 60 * HZ; | ||
372 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) | ||
373 | to->to_initval = NFS_MAX_TCP_TIMEOUT; | ||
374 | to->to_increment = to->to_initval; | ||
375 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); | ||
376 | to->to_exponential = 0; | ||
377 | break; | ||
378 | case IPPROTO_UDP: | ||
379 | default: | ||
380 | if (!to->to_initval) | ||
381 | to->to_initval = 11 * HZ / 10; | ||
382 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) | ||
383 | to->to_initval = NFS_MAX_UDP_TIMEOUT; | ||
384 | to->to_maxval = NFS_MAX_UDP_TIMEOUT; | ||
385 | to->to_exponential = 1; | ||
386 | break; | ||
387 | } | ||
388 | } | ||
389 | |||
361 | /* | 390 | /* |
362 | * Create an RPC client handle. | 391 | * Create an RPC client handle. |
363 | */ | 392 | */ |
@@ -367,22 +396,12 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) | |||
367 | struct rpc_timeout timeparms; | 396 | struct rpc_timeout timeparms; |
368 | struct rpc_xprt *xprt = NULL; | 397 | struct rpc_xprt *xprt = NULL; |
369 | struct rpc_clnt *clnt = NULL; | 398 | struct rpc_clnt *clnt = NULL; |
370 | int tcp = (data->flags & NFS_MOUNT_TCP); | 399 | int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; |
371 | 400 | ||
372 | /* Initialize timeout values */ | 401 | nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); |
373 | timeparms.to_initval = data->timeo * HZ / 10; | ||
374 | timeparms.to_retries = data->retrans; | ||
375 | timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT; | ||
376 | timeparms.to_exponential = 1; | ||
377 | |||
378 | if (!timeparms.to_initval) | ||
379 | timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10; | ||
380 | if (!timeparms.to_retries) | ||
381 | timeparms.to_retries = 5; | ||
382 | 402 | ||
383 | /* create transport and client */ | 403 | /* create transport and client */ |
384 | xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, | 404 | xprt = xprt_create_proto(proto, &server->addr, &timeparms); |
385 | &server->addr, &timeparms); | ||
386 | if (IS_ERR(xprt)) { | 405 | if (IS_ERR(xprt)) { |
387 | dprintk("%s: cannot create RPC transport. Error = %ld\n", | 406 | dprintk("%s: cannot create RPC transport. Error = %ld\n", |
388 | __FUNCTION__, PTR_ERR(xprt)); | 407 | __FUNCTION__, PTR_ERR(xprt)); |
@@ -576,7 +595,6 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
576 | { NFS_MOUNT_SOFT, ",soft", ",hard" }, | 595 | { NFS_MOUNT_SOFT, ",soft", ",hard" }, |
577 | { NFS_MOUNT_INTR, ",intr", "" }, | 596 | { NFS_MOUNT_INTR, ",intr", "" }, |
578 | { NFS_MOUNT_POSIX, ",posix", "" }, | 597 | { NFS_MOUNT_POSIX, ",posix", "" }, |
579 | { NFS_MOUNT_TCP, ",tcp", ",udp" }, | ||
580 | { NFS_MOUNT_NOCTO, ",nocto", "" }, | 598 | { NFS_MOUNT_NOCTO, ",nocto", "" }, |
581 | { NFS_MOUNT_NOAC, ",noac", "" }, | 599 | { NFS_MOUNT_NOAC, ",noac", "" }, |
582 | { NFS_MOUNT_NONLM, ",nolock", ",lock" }, | 600 | { NFS_MOUNT_NONLM, ",nolock", ",lock" }, |
@@ -585,6 +603,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
585 | }; | 603 | }; |
586 | struct proc_nfs_info *nfs_infop; | 604 | struct proc_nfs_info *nfs_infop; |
587 | struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); | 605 | struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); |
606 | char buf[12]; | ||
607 | char *proto; | ||
588 | 608 | ||
589 | seq_printf(m, ",v%d", nfss->rpc_ops->version); | 609 | seq_printf(m, ",v%d", nfss->rpc_ops->version); |
590 | seq_printf(m, ",rsize=%d", nfss->rsize); | 610 | seq_printf(m, ",rsize=%d", nfss->rsize); |
@@ -603,6 +623,18 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
603 | else | 623 | else |
604 | seq_puts(m, nfs_infop->nostr); | 624 | seq_puts(m, nfs_infop->nostr); |
605 | } | 625 | } |
626 | switch (nfss->client->cl_xprt->prot) { | ||
627 | case IPPROTO_TCP: | ||
628 | proto = "tcp"; | ||
629 | break; | ||
630 | case IPPROTO_UDP: | ||
631 | proto = "udp"; | ||
632 | break; | ||
633 | default: | ||
634 | snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); | ||
635 | proto = buf; | ||
636 | } | ||
637 | seq_printf(m, ",proto=%s", proto); | ||
606 | seq_puts(m, ",addr="); | 638 | seq_puts(m, ",addr="); |
607 | seq_escape(m, nfss->hostname, " \t\n\\"); | 639 | seq_escape(m, nfss->hostname, " \t\n\\"); |
608 | return 0; | 640 | return 0; |
@@ -821,6 +853,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
821 | filemap_fdatawait(inode->i_mapping); | 853 | filemap_fdatawait(inode->i_mapping); |
822 | nfs_wb_all(inode); | 854 | nfs_wb_all(inode); |
823 | } | 855 | } |
856 | /* | ||
857 | * Return any delegations if we're going to change ACLs | ||
858 | */ | ||
859 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) | ||
860 | nfs_inode_return_delegation(inode); | ||
824 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); | 861 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); |
825 | if (error == 0) | 862 | if (error == 0) |
826 | nfs_refresh_inode(inode, &fattr); | 863 | nfs_refresh_inode(inode, &fattr); |
@@ -1639,8 +1676,7 @@ static void nfs4_clear_inode(struct inode *inode) | |||
1639 | struct nfs_inode *nfsi = NFS_I(inode); | 1676 | struct nfs_inode *nfsi = NFS_I(inode); |
1640 | 1677 | ||
1641 | /* If we are holding a delegation, return it! */ | 1678 | /* If we are holding a delegation, return it! */ |
1642 | if (nfsi->delegation != NULL) | 1679 | nfs_inode_return_delegation(inode); |
1643 | nfs_inode_return_delegation(inode); | ||
1644 | /* First call standard NFS clear_inode() code */ | 1680 | /* First call standard NFS clear_inode() code */ |
1645 | nfs_clear_inode(inode); | 1681 | nfs_clear_inode(inode); |
1646 | /* Now clear out any remaining state */ | 1682 | /* Now clear out any remaining state */ |
@@ -1669,7 +1705,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, | |||
1669 | struct rpc_clnt *clnt = NULL; | 1705 | struct rpc_clnt *clnt = NULL; |
1670 | struct rpc_timeout timeparms; | 1706 | struct rpc_timeout timeparms; |
1671 | rpc_authflavor_t authflavour; | 1707 | rpc_authflavor_t authflavour; |
1672 | int proto, err = -EIO; | 1708 | int err = -EIO; |
1673 | 1709 | ||
1674 | sb->s_blocksize_bits = 0; | 1710 | sb->s_blocksize_bits = 0; |
1675 | sb->s_blocksize = 0; | 1711 | sb->s_blocksize = 0; |
@@ -1687,30 +1723,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, | |||
1687 | server->acdirmax = data->acdirmax*HZ; | 1723 | server->acdirmax = data->acdirmax*HZ; |
1688 | 1724 | ||
1689 | server->rpc_ops = &nfs_v4_clientops; | 1725 | server->rpc_ops = &nfs_v4_clientops; |
1690 | /* Initialize timeout values */ | ||
1691 | |||
1692 | timeparms.to_initval = data->timeo * HZ / 10; | ||
1693 | timeparms.to_retries = data->retrans; | ||
1694 | timeparms.to_exponential = 1; | ||
1695 | if (!timeparms.to_retries) | ||
1696 | timeparms.to_retries = 5; | ||
1697 | 1726 | ||
1698 | proto = data->proto; | 1727 | nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); |
1699 | /* Which IP protocol do we use? */ | ||
1700 | switch (proto) { | ||
1701 | case IPPROTO_TCP: | ||
1702 | timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT; | ||
1703 | if (!timeparms.to_initval) | ||
1704 | timeparms.to_initval = 600 * HZ / 10; | ||
1705 | break; | ||
1706 | case IPPROTO_UDP: | ||
1707 | timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT; | ||
1708 | if (!timeparms.to_initval) | ||
1709 | timeparms.to_initval = 11 * HZ / 10; | ||
1710 | break; | ||
1711 | default: | ||
1712 | return -EINVAL; | ||
1713 | } | ||
1714 | 1728 | ||
1715 | clp = nfs4_get_client(&server->addr.sin_addr); | 1729 | clp = nfs4_get_client(&server->addr.sin_addr); |
1716 | if (!clp) { | 1730 | if (!clp) { |
@@ -1735,7 +1749,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, | |||
1735 | 1749 | ||
1736 | down_write(&clp->cl_sem); | 1750 | down_write(&clp->cl_sem); |
1737 | if (IS_ERR(clp->cl_rpcclient)) { | 1751 | if (IS_ERR(clp->cl_rpcclient)) { |
1738 | xprt = xprt_create_proto(proto, &server->addr, &timeparms); | 1752 | xprt = xprt_create_proto(data->proto, &server->addr, &timeparms); |
1739 | if (IS_ERR(xprt)) { | 1753 | if (IS_ERR(xprt)) { |
1740 | up_write(&clp->cl_sem); | 1754 | up_write(&clp->cl_sem); |
1741 | err = PTR_ERR(xprt); | 1755 | err = PTR_ERR(xprt); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index edc95514046d..e4a1cd48195e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) | |||
299 | */ | 299 | */ |
300 | static int | 300 | static int |
301 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 301 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
302 | int flags) | 302 | int flags, struct nameidata *nd) |
303 | { | 303 | { |
304 | struct nfs_fh fhandle; | 304 | struct nfs_fh fhandle; |
305 | struct nfs_fattr fattr; | 305 | struct nfs_fattr fattr; |
@@ -735,7 +735,7 @@ extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); | |||
735 | static void | 735 | static void |
736 | nfs3_read_done(struct rpc_task *task) | 736 | nfs3_read_done(struct rpc_task *task) |
737 | { | 737 | { |
738 | struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; | 738 | struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; |
739 | 739 | ||
740 | if (nfs3_async_handle_jukebox(task)) | 740 | if (nfs3_async_handle_jukebox(task)) |
741 | return; | 741 | return; |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ec1a22d7b876..78a53f5a9f18 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -93,25 +93,50 @@ struct nfs4_client { | |||
93 | }; | 93 | }; |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * struct rpc_sequence ensures that RPC calls are sent in the exact | ||
97 | * order that they appear on the list. | ||
98 | */ | ||
99 | struct rpc_sequence { | ||
100 | struct rpc_wait_queue wait; /* RPC call delay queue */ | ||
101 | spinlock_t lock; /* Protects the list */ | ||
102 | struct list_head list; /* Defines sequence of RPC calls */ | ||
103 | }; | ||
104 | |||
105 | #define NFS_SEQID_CONFIRMED 1 | ||
106 | struct nfs_seqid_counter { | ||
107 | struct rpc_sequence *sequence; | ||
108 | int flags; | ||
109 | u32 counter; | ||
110 | }; | ||
111 | |||
112 | struct nfs_seqid { | ||
113 | struct nfs_seqid_counter *sequence; | ||
114 | struct list_head list; | ||
115 | }; | ||
116 | |||
117 | static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) | ||
118 | { | ||
119 | if (seqid_mutating_err(-status)) | ||
120 | seqid->flags |= NFS_SEQID_CONFIRMED; | ||
121 | } | ||
122 | |||
123 | /* | ||
96 | * NFS4 state_owners and lock_owners are simply labels for ordered | 124 | * NFS4 state_owners and lock_owners are simply labels for ordered |
97 | * sequences of RPC calls. Their sole purpose is to provide once-only | 125 | * sequences of RPC calls. Their sole purpose is to provide once-only |
98 | * semantics by allowing the server to identify replayed requests. | 126 | * semantics by allowing the server to identify replayed requests. |
99 | * | ||
100 | * The ->so_sema is held during all state_owner seqid-mutating operations: | ||
101 | * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize | ||
102 | * so_seqid. | ||
103 | */ | 127 | */ |
104 | struct nfs4_state_owner { | 128 | struct nfs4_state_owner { |
129 | spinlock_t so_lock; | ||
105 | struct list_head so_list; /* per-clientid list of state_owners */ | 130 | struct list_head so_list; /* per-clientid list of state_owners */ |
106 | struct nfs4_client *so_client; | 131 | struct nfs4_client *so_client; |
107 | u32 so_id; /* 32-bit identifier, unique */ | 132 | u32 so_id; /* 32-bit identifier, unique */ |
108 | struct semaphore so_sema; | ||
109 | u32 so_seqid; /* protected by so_sema */ | ||
110 | atomic_t so_count; | 133 | atomic_t so_count; |
111 | 134 | ||
112 | struct rpc_cred *so_cred; /* Associated cred */ | 135 | struct rpc_cred *so_cred; /* Associated cred */ |
113 | struct list_head so_states; | 136 | struct list_head so_states; |
114 | struct list_head so_delegations; | 137 | struct list_head so_delegations; |
138 | struct nfs_seqid_counter so_seqid; | ||
139 | struct rpc_sequence so_sequence; | ||
115 | }; | 140 | }; |
116 | 141 | ||
117 | /* | 142 | /* |
@@ -132,7 +157,7 @@ struct nfs4_lock_state { | |||
132 | fl_owner_t ls_owner; /* POSIX lock owner */ | 157 | fl_owner_t ls_owner; /* POSIX lock owner */ |
133 | #define NFS_LOCK_INITIALIZED 1 | 158 | #define NFS_LOCK_INITIALIZED 1 |
134 | int ls_flags; | 159 | int ls_flags; |
135 | u32 ls_seqid; | 160 | struct nfs_seqid_counter ls_seqid; |
136 | u32 ls_id; | 161 | u32 ls_id; |
137 | nfs4_stateid ls_stateid; | 162 | nfs4_stateid ls_stateid; |
138 | atomic_t ls_count; | 163 | atomic_t ls_count; |
@@ -153,7 +178,6 @@ struct nfs4_state { | |||
153 | struct inode *inode; /* Pointer to the inode */ | 178 | struct inode *inode; /* Pointer to the inode */ |
154 | 179 | ||
155 | unsigned long flags; /* Do we hold any locks? */ | 180 | unsigned long flags; /* Do we hold any locks? */ |
156 | struct semaphore lock_sema; /* Serializes file locking operations */ | ||
157 | spinlock_t state_lock; /* Protects the lock_states list */ | 181 | spinlock_t state_lock; /* Protects the lock_states list */ |
158 | 182 | ||
159 | nfs4_stateid stateid; | 183 | nfs4_stateid stateid; |
@@ -191,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); | |||
191 | extern int nfs4_proc_async_renew(struct nfs4_client *); | 215 | extern int nfs4_proc_async_renew(struct nfs4_client *); |
192 | extern int nfs4_proc_renew(struct nfs4_client *); | 216 | extern int nfs4_proc_renew(struct nfs4_client *); |
193 | extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); | 217 | extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); |
194 | extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); | 218 | extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); |
195 | extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); | 219 | extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); |
196 | 220 | ||
197 | extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; | 221 | extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; |
198 | extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; | 222 | extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; |
@@ -224,12 +248,17 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state | |||
224 | extern void nfs4_put_open_state(struct nfs4_state *); | 248 | extern void nfs4_put_open_state(struct nfs4_state *); |
225 | extern void nfs4_close_state(struct nfs4_state *, mode_t); | 249 | extern void nfs4_close_state(struct nfs4_state *, mode_t); |
226 | extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); | 250 | extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); |
227 | extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); | ||
228 | extern void nfs4_schedule_state_recovery(struct nfs4_client *); | 251 | extern void nfs4_schedule_state_recovery(struct nfs4_client *); |
252 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | ||
229 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 253 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
230 | extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); | ||
231 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); | 254 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); |
232 | 255 | ||
256 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); | ||
257 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); | ||
258 | extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); | ||
259 | extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); | ||
260 | extern void nfs_free_seqid(struct nfs_seqid *seqid); | ||
261 | |||
233 | extern const nfs4_stateid zero_stateid; | 262 | extern const nfs4_stateid zero_stateid; |
234 | 263 | ||
235 | /* nfs4xdr.c */ | 264 | /* nfs4xdr.c */ |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9701ca8c9428..9c1da34036aa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/nfs_page.h> | 47 | #include <linux/nfs_page.h> |
48 | #include <linux/smp_lock.h> | 48 | #include <linux/smp_lock.h> |
49 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
50 | #include <linux/mount.h> | ||
50 | 51 | ||
51 | #include "nfs4_fs.h" | 52 | #include "nfs4_fs.h" |
52 | #include "delegation.h" | 53 | #include "delegation.h" |
@@ -56,10 +57,11 @@ | |||
56 | #define NFS4_POLL_RETRY_MIN (1*HZ) | 57 | #define NFS4_POLL_RETRY_MIN (1*HZ) |
57 | #define NFS4_POLL_RETRY_MAX (15*HZ) | 58 | #define NFS4_POLL_RETRY_MAX (15*HZ) |
58 | 59 | ||
60 | static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); | ||
59 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 61 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
60 | static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); | 62 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); |
61 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); | 63 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); |
62 | static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); | 64 | static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); |
63 | extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); | 65 | extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); |
64 | extern struct rpc_procinfo nfs4_procedures[]; | 66 | extern struct rpc_procinfo nfs4_procedures[]; |
65 | 67 | ||
@@ -189,12 +191,28 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf | |||
189 | nfsi->change_attr = cinfo->after; | 191 | nfsi->change_attr = cinfo->after; |
190 | } | 192 | } |
191 | 193 | ||
194 | /* Helper for asynchronous RPC calls */ | ||
195 | static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, | ||
196 | rpc_action tk_exit, void *calldata) | ||
197 | { | ||
198 | struct rpc_task *task; | ||
199 | |||
200 | if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) | ||
201 | return -ENOMEM; | ||
202 | |||
203 | task->tk_calldata = calldata; | ||
204 | task->tk_action = tk_begin; | ||
205 | rpc_execute(task); | ||
206 | return 0; | ||
207 | } | ||
208 | |||
192 | static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) | 209 | static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) |
193 | { | 210 | { |
194 | struct inode *inode = state->inode; | 211 | struct inode *inode = state->inode; |
195 | 212 | ||
196 | open_flags &= (FMODE_READ|FMODE_WRITE); | 213 | open_flags &= (FMODE_READ|FMODE_WRITE); |
197 | /* Protect against nfs4_find_state() */ | 214 | /* Protect against nfs4_find_state() */ |
215 | spin_lock(&state->owner->so_lock); | ||
198 | spin_lock(&inode->i_lock); | 216 | spin_lock(&inode->i_lock); |
199 | state->state |= open_flags; | 217 | state->state |= open_flags; |
200 | /* NB! List reordering - see the reclaim code for why. */ | 218 | /* NB! List reordering - see the reclaim code for why. */ |
@@ -204,12 +222,12 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, | |||
204 | state->nreaders++; | 222 | state->nreaders++; |
205 | memcpy(&state->stateid, stateid, sizeof(state->stateid)); | 223 | memcpy(&state->stateid, stateid, sizeof(state->stateid)); |
206 | spin_unlock(&inode->i_lock); | 224 | spin_unlock(&inode->i_lock); |
225 | spin_unlock(&state->owner->so_lock); | ||
207 | } | 226 | } |
208 | 227 | ||
209 | /* | 228 | /* |
210 | * OPEN_RECLAIM: | 229 | * OPEN_RECLAIM: |
211 | * reclaim state on the server after a reboot. | 230 | * reclaim state on the server after a reboot. |
212 | * Assumes caller is holding the sp->so_sem | ||
213 | */ | 231 | */ |
214 | static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) | 232 | static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) |
215 | { | 233 | { |
@@ -218,7 +236,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
218 | struct nfs_delegation *delegation = NFS_I(inode)->delegation; | 236 | struct nfs_delegation *delegation = NFS_I(inode)->delegation; |
219 | struct nfs_openargs o_arg = { | 237 | struct nfs_openargs o_arg = { |
220 | .fh = NFS_FH(inode), | 238 | .fh = NFS_FH(inode), |
221 | .seqid = sp->so_seqid, | ||
222 | .id = sp->so_id, | 239 | .id = sp->so_id, |
223 | .open_flags = state->state, | 240 | .open_flags = state->state, |
224 | .clientid = server->nfs4_state->cl_clientid, | 241 | .clientid = server->nfs4_state->cl_clientid, |
@@ -245,8 +262,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
245 | } | 262 | } |
246 | o_arg.u.delegation_type = delegation->type; | 263 | o_arg.u.delegation_type = delegation->type; |
247 | } | 264 | } |
265 | o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); | ||
266 | if (o_arg.seqid == NULL) | ||
267 | return -ENOMEM; | ||
248 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 268 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); |
249 | nfs4_increment_seqid(status, sp); | 269 | /* Confirm the sequence as being established */ |
270 | nfs_confirm_seqid(&sp->so_seqid, status); | ||
271 | nfs_increment_open_seqid(status, o_arg.seqid); | ||
250 | if (status == 0) { | 272 | if (status == 0) { |
251 | memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); | 273 | memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); |
252 | if (o_res.delegation_type != 0) { | 274 | if (o_res.delegation_type != 0) { |
@@ -256,6 +278,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
256 | nfs_async_inode_return_delegation(inode, &o_res.stateid); | 278 | nfs_async_inode_return_delegation(inode, &o_res.stateid); |
257 | } | 279 | } |
258 | } | 280 | } |
281 | nfs_free_seqid(o_arg.seqid); | ||
259 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 282 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
260 | /* Ensure we update the inode attributes */ | 283 | /* Ensure we update the inode attributes */ |
261 | NFS_CACHEINV(inode); | 284 | NFS_CACHEINV(inode); |
@@ -302,23 +325,35 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state | |||
302 | }; | 325 | }; |
303 | int status = 0; | 326 | int status = 0; |
304 | 327 | ||
305 | down(&sp->so_sema); | ||
306 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) | 328 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) |
307 | goto out; | 329 | goto out; |
308 | if (state->state == 0) | 330 | if (state->state == 0) |
309 | goto out; | 331 | goto out; |
310 | arg.seqid = sp->so_seqid; | 332 | arg.seqid = nfs_alloc_seqid(&sp->so_seqid); |
333 | status = -ENOMEM; | ||
334 | if (arg.seqid == NULL) | ||
335 | goto out; | ||
311 | arg.open_flags = state->state; | 336 | arg.open_flags = state->state; |
312 | memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); | 337 | memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); |
313 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 338 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); |
314 | nfs4_increment_seqid(status, sp); | 339 | nfs_increment_open_seqid(status, arg.seqid); |
340 | if (status != 0) | ||
341 | goto out_free; | ||
342 | if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) { | ||
343 | status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), | ||
344 | sp, &res.stateid, arg.seqid); | ||
345 | if (status != 0) | ||
346 | goto out_free; | ||
347 | } | ||
348 | nfs_confirm_seqid(&sp->so_seqid, 0); | ||
315 | if (status >= 0) { | 349 | if (status >= 0) { |
316 | memcpy(state->stateid.data, res.stateid.data, | 350 | memcpy(state->stateid.data, res.stateid.data, |
317 | sizeof(state->stateid.data)); | 351 | sizeof(state->stateid.data)); |
318 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 352 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
319 | } | 353 | } |
354 | out_free: | ||
355 | nfs_free_seqid(arg.seqid); | ||
320 | out: | 356 | out: |
321 | up(&sp->so_sema); | ||
322 | dput(parent); | 357 | dput(parent); |
323 | return status; | 358 | return status; |
324 | } | 359 | } |
@@ -345,11 +380,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) | |||
345 | return err; | 380 | return err; |
346 | } | 381 | } |
347 | 382 | ||
348 | static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) | 383 | static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) |
349 | { | 384 | { |
350 | struct nfs_open_confirmargs arg = { | 385 | struct nfs_open_confirmargs arg = { |
351 | .fh = fh, | 386 | .fh = fh, |
352 | .seqid = sp->so_seqid, | 387 | .seqid = seqid, |
353 | .stateid = *stateid, | 388 | .stateid = *stateid, |
354 | }; | 389 | }; |
355 | struct nfs_open_confirmres res; | 390 | struct nfs_open_confirmres res; |
@@ -362,7 +397,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf | |||
362 | int status; | 397 | int status; |
363 | 398 | ||
364 | status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); | 399 | status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); |
365 | nfs4_increment_seqid(status, sp); | 400 | /* Confirm the sequence as being established */ |
401 | nfs_confirm_seqid(&sp->so_seqid, status); | ||
402 | nfs_increment_open_seqid(status, seqid); | ||
366 | if (status >= 0) | 403 | if (status >= 0) |
367 | memcpy(stateid, &res.stateid, sizeof(*stateid)); | 404 | memcpy(stateid, &res.stateid, sizeof(*stateid)); |
368 | return status; | 405 | return status; |
@@ -380,21 +417,37 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru | |||
380 | int status; | 417 | int status; |
381 | 418 | ||
382 | /* Update sequence id. The caller must serialize! */ | 419 | /* Update sequence id. The caller must serialize! */ |
383 | o_arg->seqid = sp->so_seqid; | ||
384 | o_arg->id = sp->so_id; | 420 | o_arg->id = sp->so_id; |
385 | o_arg->clientid = sp->so_client->cl_clientid; | 421 | o_arg->clientid = sp->so_client->cl_clientid; |
386 | 422 | ||
387 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 423 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); |
388 | nfs4_increment_seqid(status, sp); | 424 | if (status == 0) { |
425 | /* OPEN on anything except a regular file is disallowed in NFSv4 */ | ||
426 | switch (o_res->f_attr->mode & S_IFMT) { | ||
427 | case S_IFREG: | ||
428 | break; | ||
429 | case S_IFLNK: | ||
430 | status = -ELOOP; | ||
431 | break; | ||
432 | case S_IFDIR: | ||
433 | status = -EISDIR; | ||
434 | break; | ||
435 | default: | ||
436 | status = -ENOTDIR; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | nfs_increment_open_seqid(status, o_arg->seqid); | ||
389 | if (status != 0) | 441 | if (status != 0) |
390 | goto out; | 442 | goto out; |
391 | update_changeattr(dir, &o_res->cinfo); | 443 | update_changeattr(dir, &o_res->cinfo); |
392 | if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { | 444 | if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { |
393 | status = _nfs4_proc_open_confirm(server->client, &o_res->fh, | 445 | status = _nfs4_proc_open_confirm(server->client, &o_res->fh, |
394 | sp, &o_res->stateid); | 446 | sp, &o_res->stateid, o_arg->seqid); |
395 | if (status != 0) | 447 | if (status != 0) |
396 | goto out; | 448 | goto out; |
397 | } | 449 | } |
450 | nfs_confirm_seqid(&sp->so_seqid, 0); | ||
398 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) | 451 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) |
399 | status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); | 452 | status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); |
400 | out: | 453 | out: |
@@ -465,6 +518,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
465 | set_bit(NFS_DELEGATED_STATE, &state->flags); | 518 | set_bit(NFS_DELEGATED_STATE, &state->flags); |
466 | goto out; | 519 | goto out; |
467 | } | 520 | } |
521 | o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); | ||
522 | status = -ENOMEM; | ||
523 | if (o_arg.seqid == NULL) | ||
524 | goto out; | ||
468 | status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); | 525 | status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); |
469 | if (status != 0) | 526 | if (status != 0) |
470 | goto out_nodeleg; | 527 | goto out_nodeleg; |
@@ -490,6 +547,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
490 | nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); | 547 | nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); |
491 | } | 548 | } |
492 | out_nodeleg: | 549 | out_nodeleg: |
550 | nfs_free_seqid(o_arg.seqid); | ||
493 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 551 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
494 | out: | 552 | out: |
495 | dput(parent); | 553 | dput(parent); |
@@ -564,7 +622,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred | |||
564 | dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); | 622 | dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); |
565 | goto out_err; | 623 | goto out_err; |
566 | } | 624 | } |
567 | down(&sp->so_sema); | ||
568 | state = nfs4_get_open_state(inode, sp); | 625 | state = nfs4_get_open_state(inode, sp); |
569 | if (state == NULL) | 626 | if (state == NULL) |
570 | goto out_err; | 627 | goto out_err; |
@@ -589,7 +646,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred | |||
589 | set_bit(NFS_DELEGATED_STATE, &state->flags); | 646 | set_bit(NFS_DELEGATED_STATE, &state->flags); |
590 | update_open_stateid(state, &delegation->stateid, open_flags); | 647 | update_open_stateid(state, &delegation->stateid, open_flags); |
591 | out_ok: | 648 | out_ok: |
592 | up(&sp->so_sema); | ||
593 | nfs4_put_state_owner(sp); | 649 | nfs4_put_state_owner(sp); |
594 | up_read(&nfsi->rwsem); | 650 | up_read(&nfsi->rwsem); |
595 | up_read(&clp->cl_sem); | 651 | up_read(&clp->cl_sem); |
@@ -600,11 +656,12 @@ out_err: | |||
600 | if (sp != NULL) { | 656 | if (sp != NULL) { |
601 | if (state != NULL) | 657 | if (state != NULL) |
602 | nfs4_put_open_state(state); | 658 | nfs4_put_open_state(state); |
603 | up(&sp->so_sema); | ||
604 | nfs4_put_state_owner(sp); | 659 | nfs4_put_state_owner(sp); |
605 | } | 660 | } |
606 | up_read(&nfsi->rwsem); | 661 | up_read(&nfsi->rwsem); |
607 | up_read(&clp->cl_sem); | 662 | up_read(&clp->cl_sem); |
663 | if (err != -EACCES) | ||
664 | nfs_inode_return_delegation(inode); | ||
608 | return err; | 665 | return err; |
609 | } | 666 | } |
610 | 667 | ||
@@ -665,8 +722,10 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st | |||
665 | } else | 722 | } else |
666 | o_arg.u.attrs = sattr; | 723 | o_arg.u.attrs = sattr; |
667 | /* Serialization for the sequence id */ | 724 | /* Serialization for the sequence id */ |
668 | down(&sp->so_sema); | ||
669 | 725 | ||
726 | o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); | ||
727 | if (o_arg.seqid == NULL) | ||
728 | return -ENOMEM; | ||
670 | status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); | 729 | status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); |
671 | if (status != 0) | 730 | if (status != 0) |
672 | goto out_err; | 731 | goto out_err; |
@@ -681,7 +740,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st | |||
681 | update_open_stateid(state, &o_res.stateid, flags); | 740 | update_open_stateid(state, &o_res.stateid, flags); |
682 | if (o_res.delegation_type != 0) | 741 | if (o_res.delegation_type != 0) |
683 | nfs_inode_set_delegation(inode, cred, &o_res); | 742 | nfs_inode_set_delegation(inode, cred, &o_res); |
684 | up(&sp->so_sema); | 743 | nfs_free_seqid(o_arg.seqid); |
685 | nfs4_put_state_owner(sp); | 744 | nfs4_put_state_owner(sp); |
686 | up_read(&clp->cl_sem); | 745 | up_read(&clp->cl_sem); |
687 | *res = state; | 746 | *res = state; |
@@ -690,7 +749,7 @@ out_err: | |||
690 | if (sp != NULL) { | 749 | if (sp != NULL) { |
691 | if (state != NULL) | 750 | if (state != NULL) |
692 | nfs4_put_open_state(state); | 751 | nfs4_put_open_state(state); |
693 | up(&sp->so_sema); | 752 | nfs_free_seqid(o_arg.seqid); |
694 | nfs4_put_state_owner(sp); | 753 | nfs4_put_state_owner(sp); |
695 | } | 754 | } |
696 | /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ | 755 | /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ |
@@ -718,7 +777,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, | |||
718 | * It is actually a sign of a bug on the client or on the server. | 777 | * It is actually a sign of a bug on the client or on the server. |
719 | * | 778 | * |
720 | * If we receive a BAD_SEQID error in the particular case of | 779 | * If we receive a BAD_SEQID error in the particular case of |
721 | * doing an OPEN, we assume that nfs4_increment_seqid() will | 780 | * doing an OPEN, we assume that nfs_increment_open_seqid() will |
722 | * have unhashed the old state_owner for us, and that we can | 781 | * have unhashed the old state_owner for us, and that we can |
723 | * therefore safely retry using a new one. We should still warn | 782 | * therefore safely retry using a new one. We should still warn |
724 | * the user though... | 783 | * the user though... |
@@ -728,6 +787,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, | |||
728 | exception.retry = 1; | 787 | exception.retry = 1; |
729 | continue; | 788 | continue; |
730 | } | 789 | } |
790 | /* | ||
791 | * BAD_STATEID on OPEN means that the server cancelled our | ||
792 | * state before it received the OPEN_CONFIRM. | ||
793 | * Recover by retrying the request as per the discussion | ||
794 | * on Page 181 of RFC3530. | ||
795 | */ | ||
796 | if (status == -NFS4ERR_BAD_STATEID) { | ||
797 | exception.retry = 1; | ||
798 | continue; | ||
799 | } | ||
731 | res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), | 800 | res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), |
732 | status, &exception)); | 801 | status, &exception)); |
733 | } while (exception.retry); | 802 | } while (exception.retry); |
@@ -789,17 +858,27 @@ struct nfs4_closedata { | |||
789 | struct nfs_closeres res; | 858 | struct nfs_closeres res; |
790 | }; | 859 | }; |
791 | 860 | ||
861 | static void nfs4_free_closedata(struct nfs4_closedata *calldata) | ||
862 | { | ||
863 | struct nfs4_state *state = calldata->state; | ||
864 | struct nfs4_state_owner *sp = state->owner; | ||
865 | |||
866 | nfs4_put_open_state(calldata->state); | ||
867 | nfs_free_seqid(calldata->arg.seqid); | ||
868 | nfs4_put_state_owner(sp); | ||
869 | kfree(calldata); | ||
870 | } | ||
871 | |||
792 | static void nfs4_close_done(struct rpc_task *task) | 872 | static void nfs4_close_done(struct rpc_task *task) |
793 | { | 873 | { |
794 | struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; | 874 | struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; |
795 | struct nfs4_state *state = calldata->state; | 875 | struct nfs4_state *state = calldata->state; |
796 | struct nfs4_state_owner *sp = state->owner; | ||
797 | struct nfs_server *server = NFS_SERVER(calldata->inode); | 876 | struct nfs_server *server = NFS_SERVER(calldata->inode); |
798 | 877 | ||
799 | /* hmm. we are done with the inode, and in the process of freeing | 878 | /* hmm. we are done with the inode, and in the process of freeing |
800 | * the state_owner. we keep this around to process errors | 879 | * the state_owner. we keep this around to process errors |
801 | */ | 880 | */ |
802 | nfs4_increment_seqid(task->tk_status, sp); | 881 | nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); |
803 | switch (task->tk_status) { | 882 | switch (task->tk_status) { |
804 | case 0: | 883 | case 0: |
805 | memcpy(&state->stateid, &calldata->res.stateid, | 884 | memcpy(&state->stateid, &calldata->res.stateid, |
@@ -817,24 +896,46 @@ static void nfs4_close_done(struct rpc_task *task) | |||
817 | } | 896 | } |
818 | } | 897 | } |
819 | state->state = calldata->arg.open_flags; | 898 | state->state = calldata->arg.open_flags; |
820 | nfs4_put_open_state(state); | 899 | nfs4_free_closedata(calldata); |
821 | up(&sp->so_sema); | ||
822 | nfs4_put_state_owner(sp); | ||
823 | up_read(&server->nfs4_state->cl_sem); | ||
824 | kfree(calldata); | ||
825 | } | 900 | } |
826 | 901 | ||
827 | static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) | 902 | static void nfs4_close_begin(struct rpc_task *task) |
828 | { | 903 | { |
904 | struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; | ||
905 | struct nfs4_state *state = calldata->state; | ||
829 | struct rpc_message msg = { | 906 | struct rpc_message msg = { |
830 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], | 907 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], |
831 | .rpc_argp = &calldata->arg, | 908 | .rpc_argp = &calldata->arg, |
832 | .rpc_resp = &calldata->res, | 909 | .rpc_resp = &calldata->res, |
833 | .rpc_cred = calldata->state->owner->so_cred, | 910 | .rpc_cred = state->owner->so_cred, |
834 | }; | 911 | }; |
835 | if (calldata->arg.open_flags != 0) | 912 | int mode = 0; |
913 | int status; | ||
914 | |||
915 | status = nfs_wait_on_sequence(calldata->arg.seqid, task); | ||
916 | if (status != 0) | ||
917 | return; | ||
918 | /* Don't reorder reads */ | ||
919 | smp_rmb(); | ||
920 | /* Recalculate the new open mode in case someone reopened the file | ||
921 | * while we were waiting in line to be scheduled. | ||
922 | */ | ||
923 | if (state->nreaders != 0) | ||
924 | mode |= FMODE_READ; | ||
925 | if (state->nwriters != 0) | ||
926 | mode |= FMODE_WRITE; | ||
927 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
928 | state->state = mode; | ||
929 | if (mode == state->state) { | ||
930 | nfs4_free_closedata(calldata); | ||
931 | task->tk_exit = NULL; | ||
932 | rpc_exit(task, 0); | ||
933 | return; | ||
934 | } | ||
935 | if (mode != 0) | ||
836 | msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; | 936 | msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; |
837 | return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); | 937 | calldata->arg.open_flags = mode; |
938 | rpc_call_setup(task, &msg, 0); | ||
838 | } | 939 | } |
839 | 940 | ||
840 | /* | 941 | /* |
@@ -851,39 +952,52 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata * | |||
851 | int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) | 952 | int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) |
852 | { | 953 | { |
853 | struct nfs4_closedata *calldata; | 954 | struct nfs4_closedata *calldata; |
854 | int status; | 955 | int status = -ENOMEM; |
855 | 956 | ||
856 | /* Tell caller we're done */ | 957 | calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); |
857 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { | ||
858 | state->state = mode; | ||
859 | return 0; | ||
860 | } | ||
861 | calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); | ||
862 | if (calldata == NULL) | 958 | if (calldata == NULL) |
863 | return -ENOMEM; | 959 | goto out; |
864 | calldata->inode = inode; | 960 | calldata->inode = inode; |
865 | calldata->state = state; | 961 | calldata->state = state; |
866 | calldata->arg.fh = NFS_FH(inode); | 962 | calldata->arg.fh = NFS_FH(inode); |
963 | calldata->arg.stateid = &state->stateid; | ||
867 | /* Serialization for the sequence id */ | 964 | /* Serialization for the sequence id */ |
868 | calldata->arg.seqid = state->owner->so_seqid; | 965 | calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); |
869 | calldata->arg.open_flags = mode; | 966 | if (calldata->arg.seqid == NULL) |
870 | memcpy(&calldata->arg.stateid, &state->stateid, | 967 | goto out_free_calldata; |
871 | sizeof(calldata->arg.stateid)); | 968 | |
872 | status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); | 969 | status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin, |
873 | /* | 970 | nfs4_close_done, calldata); |
874 | * Return -EINPROGRESS on success in order to indicate to the | 971 | if (status == 0) |
875 | * caller that an asynchronous RPC call has been launched, and | 972 | goto out; |
876 | * that it will release the semaphores on completion. | 973 | |
877 | */ | 974 | nfs_free_seqid(calldata->arg.seqid); |
878 | return (status == 0) ? -EINPROGRESS : status; | 975 | out_free_calldata: |
976 | kfree(calldata); | ||
977 | out: | ||
978 | return status; | ||
979 | } | ||
980 | |||
981 | static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) | ||
982 | { | ||
983 | struct file *filp; | ||
984 | |||
985 | filp = lookup_instantiate_filp(nd, dentry, NULL); | ||
986 | if (!IS_ERR(filp)) { | ||
987 | struct nfs_open_context *ctx; | ||
988 | ctx = (struct nfs_open_context *)filp->private_data; | ||
989 | ctx->state = state; | ||
990 | } else | ||
991 | nfs4_close_state(state, nd->intent.open.flags); | ||
879 | } | 992 | } |
880 | 993 | ||
881 | struct inode * | 994 | struct dentry * |
882 | nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | 995 | nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
883 | { | 996 | { |
884 | struct iattr attr; | 997 | struct iattr attr; |
885 | struct rpc_cred *cred; | 998 | struct rpc_cred *cred; |
886 | struct nfs4_state *state; | 999 | struct nfs4_state *state; |
1000 | struct dentry *res; | ||
887 | 1001 | ||
888 | if (nd->flags & LOOKUP_CREATE) { | 1002 | if (nd->flags & LOOKUP_CREATE) { |
889 | attr.ia_mode = nd->intent.open.create_mode; | 1003 | attr.ia_mode = nd->intent.open.create_mode; |
@@ -897,16 +1011,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
897 | 1011 | ||
898 | cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); | 1012 | cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); |
899 | if (IS_ERR(cred)) | 1013 | if (IS_ERR(cred)) |
900 | return (struct inode *)cred; | 1014 | return (struct dentry *)cred; |
901 | state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); | 1015 | state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); |
902 | put_rpccred(cred); | 1016 | put_rpccred(cred); |
903 | if (IS_ERR(state)) | 1017 | if (IS_ERR(state)) { |
904 | return (struct inode *)state; | 1018 | if (PTR_ERR(state) == -ENOENT) |
905 | return state->inode; | 1019 | d_add(dentry, NULL); |
1020 | return (struct dentry *)state; | ||
1021 | } | ||
1022 | res = d_add_unique(dentry, state->inode); | ||
1023 | if (res != NULL) | ||
1024 | dentry = res; | ||
1025 | nfs4_intent_set_file(nd, dentry, state); | ||
1026 | return res; | ||
906 | } | 1027 | } |
907 | 1028 | ||
908 | int | 1029 | int |
909 | nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) | 1030 | nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) |
910 | { | 1031 | { |
911 | struct rpc_cred *cred; | 1032 | struct rpc_cred *cred; |
912 | struct nfs4_state *state; | 1033 | struct nfs4_state *state; |
@@ -919,18 +1040,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) | |||
919 | if (IS_ERR(state)) | 1040 | if (IS_ERR(state)) |
920 | state = nfs4_do_open(dir, dentry, openflags, NULL, cred); | 1041 | state = nfs4_do_open(dir, dentry, openflags, NULL, cred); |
921 | put_rpccred(cred); | 1042 | put_rpccred(cred); |
922 | if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) | 1043 | if (IS_ERR(state)) { |
923 | return 1; | 1044 | switch (PTR_ERR(state)) { |
924 | if (IS_ERR(state)) | 1045 | case -EPERM: |
925 | return 0; | 1046 | case -EACCES: |
1047 | case -EDQUOT: | ||
1048 | case -ENOSPC: | ||
1049 | case -EROFS: | ||
1050 | lookup_instantiate_filp(nd, (struct dentry *)state, NULL); | ||
1051 | return 1; | ||
1052 | case -ENOENT: | ||
1053 | if (dentry->d_inode == NULL) | ||
1054 | return 1; | ||
1055 | } | ||
1056 | goto out_drop; | ||
1057 | } | ||
926 | inode = state->inode; | 1058 | inode = state->inode; |
1059 | iput(inode); | ||
927 | if (inode == dentry->d_inode) { | 1060 | if (inode == dentry->d_inode) { |
928 | iput(inode); | 1061 | nfs4_intent_set_file(nd, dentry, state); |
929 | return 1; | 1062 | return 1; |
930 | } | 1063 | } |
931 | d_drop(dentry); | ||
932 | nfs4_close_state(state, openflags); | 1064 | nfs4_close_state(state, openflags); |
933 | iput(inode); | 1065 | out_drop: |
1066 | d_drop(dentry); | ||
934 | return 0; | 1067 | return 0; |
935 | } | 1068 | } |
936 | 1069 | ||
@@ -1431,7 +1564,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata) | |||
1431 | 1564 | ||
1432 | static int | 1565 | static int |
1433 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 1566 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
1434 | int flags) | 1567 | int flags, struct nameidata *nd) |
1435 | { | 1568 | { |
1436 | struct nfs4_state *state; | 1569 | struct nfs4_state *state; |
1437 | struct rpc_cred *cred; | 1570 | struct rpc_cred *cred; |
@@ -1453,13 +1586,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
1453 | struct nfs_fattr fattr; | 1586 | struct nfs_fattr fattr; |
1454 | status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, | 1587 | status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, |
1455 | NFS_FH(state->inode), sattr, state); | 1588 | NFS_FH(state->inode), sattr, state); |
1456 | if (status == 0) { | 1589 | if (status == 0) |
1457 | nfs_setattr_update_inode(state->inode, sattr); | 1590 | nfs_setattr_update_inode(state->inode, sattr); |
1458 | goto out; | 1591 | } |
1459 | } | 1592 | if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) |
1460 | } else if (flags != 0) | 1593 | nfs4_intent_set_file(nd, dentry, state); |
1461 | goto out; | 1594 | else |
1462 | nfs4_close_state(state, flags); | 1595 | nfs4_close_state(state, flags); |
1463 | out: | 1596 | out: |
1464 | return status; | 1597 | return status; |
1465 | } | 1598 | } |
@@ -2106,65 +2239,6 @@ nfs4_proc_renew(struct nfs4_client *clp) | |||
2106 | return 0; | 2239 | return 0; |
2107 | } | 2240 | } |
2108 | 2241 | ||
2109 | /* | ||
2110 | * We will need to arrange for the VFS layer to provide an atomic open. | ||
2111 | * Until then, this open method is prone to inefficiency and race conditions | ||
2112 | * due to the lookup, potential create, and open VFS calls from sys_open() | ||
2113 | * placed on the wire. | ||
2114 | */ | ||
2115 | static int | ||
2116 | nfs4_proc_file_open(struct inode *inode, struct file *filp) | ||
2117 | { | ||
2118 | struct dentry *dentry = filp->f_dentry; | ||
2119 | struct nfs_open_context *ctx; | ||
2120 | struct nfs4_state *state = NULL; | ||
2121 | struct rpc_cred *cred; | ||
2122 | int status = -ENOMEM; | ||
2123 | |||
2124 | dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", | ||
2125 | (int)dentry->d_parent->d_name.len, | ||
2126 | dentry->d_parent->d_name.name, | ||
2127 | (int)dentry->d_name.len, dentry->d_name.name); | ||
2128 | |||
2129 | |||
2130 | /* Find our open stateid */ | ||
2131 | cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); | ||
2132 | if (IS_ERR(cred)) | ||
2133 | return PTR_ERR(cred); | ||
2134 | ctx = alloc_nfs_open_context(dentry, cred); | ||
2135 | put_rpccred(cred); | ||
2136 | if (unlikely(ctx == NULL)) | ||
2137 | return -ENOMEM; | ||
2138 | status = -EIO; /* ERACE actually */ | ||
2139 | state = nfs4_find_state(inode, cred, filp->f_mode); | ||
2140 | if (unlikely(state == NULL)) | ||
2141 | goto no_state; | ||
2142 | ctx->state = state; | ||
2143 | nfs4_close_state(state, filp->f_mode); | ||
2144 | ctx->mode = filp->f_mode; | ||
2145 | nfs_file_set_open_context(filp, ctx); | ||
2146 | put_nfs_open_context(ctx); | ||
2147 | if (filp->f_mode & FMODE_WRITE) | ||
2148 | nfs_begin_data_update(inode); | ||
2149 | return 0; | ||
2150 | no_state: | ||
2151 | printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); | ||
2152 | put_nfs_open_context(ctx); | ||
2153 | return status; | ||
2154 | } | ||
2155 | |||
2156 | /* | ||
2157 | * Release our state | ||
2158 | */ | ||
2159 | static int | ||
2160 | nfs4_proc_file_release(struct inode *inode, struct file *filp) | ||
2161 | { | ||
2162 | if (filp->f_mode & FMODE_WRITE) | ||
2163 | nfs_end_data_update(inode); | ||
2164 | nfs_file_clear_open_context(filp); | ||
2165 | return 0; | ||
2166 | } | ||
2167 | |||
2168 | static inline int nfs4_server_supports_acls(struct nfs_server *server) | 2242 | static inline int nfs4_server_supports_acls(struct nfs_server *server) |
2169 | { | 2243 | { |
2170 | return (server->caps & NFS_CAP_ACLS) | 2244 | return (server->caps & NFS_CAP_ACLS) |
@@ -2285,7 +2359,7 @@ static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size | |||
2285 | return -ENOMEM; | 2359 | return -ENOMEM; |
2286 | args.acl_pages[0] = localpage; | 2360 | args.acl_pages[0] = localpage; |
2287 | args.acl_pgbase = 0; | 2361 | args.acl_pgbase = 0; |
2288 | args.acl_len = PAGE_SIZE; | 2362 | resp_len = args.acl_len = PAGE_SIZE; |
2289 | } else { | 2363 | } else { |
2290 | resp_buf = buf; | 2364 | resp_buf = buf; |
2291 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); | 2365 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); |
@@ -2345,6 +2419,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
2345 | 2419 | ||
2346 | if (!nfs4_server_supports_acls(server)) | 2420 | if (!nfs4_server_supports_acls(server)) |
2347 | return -EOPNOTSUPP; | 2421 | return -EOPNOTSUPP; |
2422 | nfs_inode_return_delegation(inode); | ||
2348 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); | 2423 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); |
2349 | ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); | 2424 | ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); |
2350 | if (ret == 0) | 2425 | if (ret == 0) |
@@ -2353,7 +2428,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
2353 | } | 2428 | } |
2354 | 2429 | ||
2355 | static int | 2430 | static int |
2356 | nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) | 2431 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) |
2357 | { | 2432 | { |
2358 | struct nfs4_client *clp = server->nfs4_state; | 2433 | struct nfs4_client *clp = server->nfs4_state; |
2359 | 2434 | ||
@@ -2431,7 +2506,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | |||
2431 | /* This is the error handling routine for processes that are allowed | 2506 | /* This is the error handling routine for processes that are allowed |
2432 | * to sleep. | 2507 | * to sleep. |
2433 | */ | 2508 | */ |
2434 | int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) | 2509 | int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) |
2435 | { | 2510 | { |
2436 | struct nfs4_client *clp = server->nfs4_state; | 2511 | struct nfs4_client *clp = server->nfs4_state; |
2437 | int ret = errorcode; | 2512 | int ret = errorcode; |
@@ -2632,7 +2707,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock | |||
2632 | 2707 | ||
2633 | down_read(&clp->cl_sem); | 2708 | down_read(&clp->cl_sem); |
2634 | nlo.clientid = clp->cl_clientid; | 2709 | nlo.clientid = clp->cl_clientid; |
2635 | down(&state->lock_sema); | ||
2636 | status = nfs4_set_lock_state(state, request); | 2710 | status = nfs4_set_lock_state(state, request); |
2637 | if (status != 0) | 2711 | if (status != 0) |
2638 | goto out; | 2712 | goto out; |
@@ -2659,7 +2733,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock | |||
2659 | status = 0; | 2733 | status = 0; |
2660 | } | 2734 | } |
2661 | out: | 2735 | out: |
2662 | up(&state->lock_sema); | ||
2663 | up_read(&clp->cl_sem); | 2736 | up_read(&clp->cl_sem); |
2664 | return status; | 2737 | return status; |
2665 | } | 2738 | } |
@@ -2696,79 +2769,149 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) | |||
2696 | return res; | 2769 | return res; |
2697 | } | 2770 | } |
2698 | 2771 | ||
2699 | static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) | 2772 | struct nfs4_unlockdata { |
2773 | struct nfs_lockargs arg; | ||
2774 | struct nfs_locku_opargs luargs; | ||
2775 | struct nfs_lockres res; | ||
2776 | struct nfs4_lock_state *lsp; | ||
2777 | struct nfs_open_context *ctx; | ||
2778 | atomic_t refcount; | ||
2779 | struct completion completion; | ||
2780 | }; | ||
2781 | |||
2782 | static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) | ||
2700 | { | 2783 | { |
2701 | struct inode *inode = state->inode; | 2784 | if (atomic_dec_and_test(&calldata->refcount)) { |
2702 | struct nfs_server *server = NFS_SERVER(inode); | 2785 | nfs_free_seqid(calldata->luargs.seqid); |
2703 | struct nfs4_client *clp = server->nfs4_state; | 2786 | nfs4_put_lock_state(calldata->lsp); |
2704 | struct nfs_lockargs arg = { | 2787 | put_nfs_open_context(calldata->ctx); |
2705 | .fh = NFS_FH(inode), | 2788 | kfree(calldata); |
2706 | .type = nfs4_lck_type(cmd, request), | 2789 | } |
2707 | .offset = request->fl_start, | 2790 | } |
2708 | .length = nfs4_lck_length(request), | 2791 | |
2709 | }; | 2792 | static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) |
2710 | struct nfs_lockres res = { | 2793 | { |
2711 | .server = server, | 2794 | complete(&calldata->completion); |
2712 | }; | 2795 | nfs4_locku_release_calldata(calldata); |
2796 | } | ||
2797 | |||
2798 | static void nfs4_locku_done(struct rpc_task *task) | ||
2799 | { | ||
2800 | struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; | ||
2801 | |||
2802 | nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); | ||
2803 | switch (task->tk_status) { | ||
2804 | case 0: | ||
2805 | memcpy(calldata->lsp->ls_stateid.data, | ||
2806 | calldata->res.u.stateid.data, | ||
2807 | sizeof(calldata->lsp->ls_stateid.data)); | ||
2808 | break; | ||
2809 | case -NFS4ERR_STALE_STATEID: | ||
2810 | case -NFS4ERR_EXPIRED: | ||
2811 | nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); | ||
2812 | break; | ||
2813 | default: | ||
2814 | if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { | ||
2815 | rpc_restart_call(task); | ||
2816 | return; | ||
2817 | } | ||
2818 | } | ||
2819 | nfs4_locku_complete(calldata); | ||
2820 | } | ||
2821 | |||
2822 | static void nfs4_locku_begin(struct rpc_task *task) | ||
2823 | { | ||
2824 | struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; | ||
2713 | struct rpc_message msg = { | 2825 | struct rpc_message msg = { |
2714 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], | 2826 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], |
2715 | .rpc_argp = &arg, | 2827 | .rpc_argp = &calldata->arg, |
2716 | .rpc_resp = &res, | 2828 | .rpc_resp = &calldata->res, |
2717 | .rpc_cred = state->owner->so_cred, | 2829 | .rpc_cred = calldata->lsp->ls_state->owner->so_cred, |
2718 | }; | 2830 | }; |
2831 | int status; | ||
2832 | |||
2833 | status = nfs_wait_on_sequence(calldata->luargs.seqid, task); | ||
2834 | if (status != 0) | ||
2835 | return; | ||
2836 | if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { | ||
2837 | nfs4_locku_complete(calldata); | ||
2838 | task->tk_exit = NULL; | ||
2839 | rpc_exit(task, 0); | ||
2840 | return; | ||
2841 | } | ||
2842 | rpc_call_setup(task, &msg, 0); | ||
2843 | } | ||
2844 | |||
2845 | static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) | ||
2846 | { | ||
2847 | struct nfs4_unlockdata *calldata; | ||
2848 | struct inode *inode = state->inode; | ||
2849 | struct nfs_server *server = NFS_SERVER(inode); | ||
2719 | struct nfs4_lock_state *lsp; | 2850 | struct nfs4_lock_state *lsp; |
2720 | struct nfs_locku_opargs luargs; | ||
2721 | int status; | 2851 | int status; |
2722 | 2852 | ||
2723 | down_read(&clp->cl_sem); | ||
2724 | down(&state->lock_sema); | ||
2725 | status = nfs4_set_lock_state(state, request); | 2853 | status = nfs4_set_lock_state(state, request); |
2726 | if (status != 0) | 2854 | if (status != 0) |
2727 | goto out; | 2855 | return status; |
2728 | lsp = request->fl_u.nfs4_fl.owner; | 2856 | lsp = request->fl_u.nfs4_fl.owner; |
2729 | /* We might have lost the locks! */ | 2857 | /* We might have lost the locks! */ |
2730 | if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) | 2858 | if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) |
2731 | goto out; | 2859 | return 0; |
2732 | luargs.seqid = lsp->ls_seqid; | 2860 | calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); |
2733 | memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); | 2861 | if (calldata == NULL) |
2734 | arg.u.locku = &luargs; | 2862 | return -ENOMEM; |
2735 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 2863 | calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); |
2736 | nfs4_increment_lock_seqid(status, lsp); | 2864 | if (calldata->luargs.seqid == NULL) { |
2737 | 2865 | kfree(calldata); | |
2738 | if (status == 0) | 2866 | return -ENOMEM; |
2739 | memcpy(&lsp->ls_stateid, &res.u.stateid, | 2867 | } |
2740 | sizeof(lsp->ls_stateid)); | 2868 | calldata->luargs.stateid = &lsp->ls_stateid; |
2741 | out: | 2869 | calldata->arg.fh = NFS_FH(inode); |
2742 | up(&state->lock_sema); | 2870 | calldata->arg.type = nfs4_lck_type(cmd, request); |
2871 | calldata->arg.offset = request->fl_start; | ||
2872 | calldata->arg.length = nfs4_lck_length(request); | ||
2873 | calldata->arg.u.locku = &calldata->luargs; | ||
2874 | calldata->res.server = server; | ||
2875 | calldata->lsp = lsp; | ||
2876 | atomic_inc(&lsp->ls_count); | ||
2877 | |||
2878 | /* Ensure we don't close file until we're done freeing locks! */ | ||
2879 | calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); | ||
2880 | |||
2881 | atomic_set(&calldata->refcount, 2); | ||
2882 | init_completion(&calldata->completion); | ||
2883 | |||
2884 | status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, | ||
2885 | nfs4_locku_done, calldata); | ||
2743 | if (status == 0) | 2886 | if (status == 0) |
2744 | do_vfs_lock(request->fl_file, request); | 2887 | wait_for_completion_interruptible(&calldata->completion); |
2745 | up_read(&clp->cl_sem); | 2888 | do_vfs_lock(request->fl_file, request); |
2889 | nfs4_locku_release_calldata(calldata); | ||
2746 | return status; | 2890 | return status; |
2747 | } | 2891 | } |
2748 | 2892 | ||
2749 | static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) | ||
2750 | { | ||
2751 | struct nfs4_exception exception = { }; | ||
2752 | int err; | ||
2753 | |||
2754 | do { | ||
2755 | err = nfs4_handle_exception(NFS_SERVER(state->inode), | ||
2756 | _nfs4_proc_unlck(state, cmd, request), | ||
2757 | &exception); | ||
2758 | } while (exception.retry); | ||
2759 | return err; | ||
2760 | } | ||
2761 | |||
2762 | static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) | 2893 | static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) |
2763 | { | 2894 | { |
2764 | struct inode *inode = state->inode; | 2895 | struct inode *inode = state->inode; |
2765 | struct nfs_server *server = NFS_SERVER(inode); | 2896 | struct nfs_server *server = NFS_SERVER(inode); |
2766 | struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; | 2897 | struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; |
2898 | struct nfs_lock_opargs largs = { | ||
2899 | .lock_stateid = &lsp->ls_stateid, | ||
2900 | .open_stateid = &state->stateid, | ||
2901 | .lock_owner = { | ||
2902 | .clientid = server->nfs4_state->cl_clientid, | ||
2903 | .id = lsp->ls_id, | ||
2904 | }, | ||
2905 | .reclaim = reclaim, | ||
2906 | }; | ||
2767 | struct nfs_lockargs arg = { | 2907 | struct nfs_lockargs arg = { |
2768 | .fh = NFS_FH(inode), | 2908 | .fh = NFS_FH(inode), |
2769 | .type = nfs4_lck_type(cmd, request), | 2909 | .type = nfs4_lck_type(cmd, request), |
2770 | .offset = request->fl_start, | 2910 | .offset = request->fl_start, |
2771 | .length = nfs4_lck_length(request), | 2911 | .length = nfs4_lck_length(request), |
2912 | .u = { | ||
2913 | .lock = &largs, | ||
2914 | }, | ||
2772 | }; | 2915 | }; |
2773 | struct nfs_lockres res = { | 2916 | struct nfs_lockres res = { |
2774 | .server = server, | 2917 | .server = server, |
@@ -2779,53 +2922,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r | |||
2779 | .rpc_resp = &res, | 2922 | .rpc_resp = &res, |
2780 | .rpc_cred = state->owner->so_cred, | 2923 | .rpc_cred = state->owner->so_cred, |
2781 | }; | 2924 | }; |
2782 | struct nfs_lock_opargs largs = { | 2925 | int status = -ENOMEM; |
2783 | .reclaim = reclaim, | ||
2784 | .new_lock_owner = 0, | ||
2785 | }; | ||
2786 | int status; | ||
2787 | 2926 | ||
2788 | if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { | 2927 | largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); |
2928 | if (largs.lock_seqid == NULL) | ||
2929 | return -ENOMEM; | ||
2930 | if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { | ||
2789 | struct nfs4_state_owner *owner = state->owner; | 2931 | struct nfs4_state_owner *owner = state->owner; |
2790 | struct nfs_open_to_lock otl = { | 2932 | |
2791 | .lock_owner = { | 2933 | largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); |
2792 | .clientid = server->nfs4_state->cl_clientid, | 2934 | if (largs.open_seqid == NULL) |
2793 | }, | 2935 | goto out; |
2794 | }; | ||
2795 | |||
2796 | otl.lock_seqid = lsp->ls_seqid; | ||
2797 | otl.lock_owner.id = lsp->ls_id; | ||
2798 | memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); | ||
2799 | largs.u.open_lock = &otl; | ||
2800 | largs.new_lock_owner = 1; | 2936 | largs.new_lock_owner = 1; |
2801 | arg.u.lock = &largs; | ||
2802 | down(&owner->so_sema); | ||
2803 | otl.open_seqid = owner->so_seqid; | ||
2804 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 2937 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); |
2805 | /* increment open_owner seqid on success, and | 2938 | /* increment open seqid on success, and seqid mutating errors */ |
2806 | * seqid mutating errors */ | 2939 | if (largs.new_lock_owner != 0) { |
2807 | nfs4_increment_seqid(status, owner); | 2940 | nfs_increment_open_seqid(status, largs.open_seqid); |
2808 | up(&owner->so_sema); | 2941 | if (status == 0) |
2809 | if (status == 0) { | 2942 | nfs_confirm_seqid(&lsp->ls_seqid, 0); |
2810 | lsp->ls_flags |= NFS_LOCK_INITIALIZED; | ||
2811 | lsp->ls_seqid++; | ||
2812 | } | 2943 | } |
2813 | } else { | 2944 | nfs_free_seqid(largs.open_seqid); |
2814 | struct nfs_exist_lock el = { | 2945 | } else |
2815 | .seqid = lsp->ls_seqid, | ||
2816 | }; | ||
2817 | memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); | ||
2818 | largs.u.exist_lock = ⪙ | ||
2819 | arg.u.lock = &largs; | ||
2820 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); | 2946 | status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); |
2821 | /* increment seqid on success, and * seqid mutating errors*/ | 2947 | /* increment lock seqid on success, and seqid mutating errors*/ |
2822 | nfs4_increment_lock_seqid(status, lsp); | 2948 | nfs_increment_lock_seqid(status, largs.lock_seqid); |
2823 | } | ||
2824 | /* save the returned stateid. */ | 2949 | /* save the returned stateid. */ |
2825 | if (status == 0) | 2950 | if (status == 0) { |
2826 | memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); | 2951 | memcpy(lsp->ls_stateid.data, res.u.stateid.data, |
2827 | else if (status == -NFS4ERR_DENIED) | 2952 | sizeof(lsp->ls_stateid.data)); |
2953 | lsp->ls_flags |= NFS_LOCK_INITIALIZED; | ||
2954 | } else if (status == -NFS4ERR_DENIED) | ||
2828 | status = -EAGAIN; | 2955 | status = -EAGAIN; |
2956 | out: | ||
2957 | nfs_free_seqid(largs.lock_seqid); | ||
2829 | return status; | 2958 | return status; |
2830 | } | 2959 | } |
2831 | 2960 | ||
@@ -2865,11 +2994,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock | |||
2865 | int status; | 2994 | int status; |
2866 | 2995 | ||
2867 | down_read(&clp->cl_sem); | 2996 | down_read(&clp->cl_sem); |
2868 | down(&state->lock_sema); | ||
2869 | status = nfs4_set_lock_state(state, request); | 2997 | status = nfs4_set_lock_state(state, request); |
2870 | if (status == 0) | 2998 | if (status == 0) |
2871 | status = _nfs4_do_setlk(state, cmd, request, 0); | 2999 | status = _nfs4_do_setlk(state, cmd, request, 0); |
2872 | up(&state->lock_sema); | ||
2873 | if (status == 0) { | 3000 | if (status == 0) { |
2874 | /* Note: we always want to sleep here! */ | 3001 | /* Note: we always want to sleep here! */ |
2875 | request->fl_flags |= FL_SLEEP; | 3002 | request->fl_flags |= FL_SLEEP; |
@@ -3024,8 +3151,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { | |||
3024 | .read_setup = nfs4_proc_read_setup, | 3151 | .read_setup = nfs4_proc_read_setup, |
3025 | .write_setup = nfs4_proc_write_setup, | 3152 | .write_setup = nfs4_proc_write_setup, |
3026 | .commit_setup = nfs4_proc_commit_setup, | 3153 | .commit_setup = nfs4_proc_commit_setup, |
3027 | .file_open = nfs4_proc_file_open, | 3154 | .file_open = nfs_open, |
3028 | .file_release = nfs4_proc_file_release, | 3155 | .file_release = nfs_release, |
3029 | .lock = nfs4_proc_lock, | 3156 | .lock = nfs4_proc_lock, |
3030 | .clear_acl_cache = nfs4_zap_acl_attr, | 3157 | .clear_acl_cache = nfs4_zap_acl_attr, |
3031 | }; | 3158 | }; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afe587d82f1e..2d5a6a2b9dec 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void) | |||
264 | { | 264 | { |
265 | struct nfs4_state_owner *sp; | 265 | struct nfs4_state_owner *sp; |
266 | 266 | ||
267 | sp = kmalloc(sizeof(*sp),GFP_KERNEL); | 267 | sp = kzalloc(sizeof(*sp),GFP_KERNEL); |
268 | if (!sp) | 268 | if (!sp) |
269 | return NULL; | 269 | return NULL; |
270 | init_MUTEX(&sp->so_sema); | 270 | spin_lock_init(&sp->so_lock); |
271 | sp->so_seqid = 0; /* arbitrary */ | ||
272 | INIT_LIST_HEAD(&sp->so_states); | 271 | INIT_LIST_HEAD(&sp->so_states); |
273 | INIT_LIST_HEAD(&sp->so_delegations); | 272 | INIT_LIST_HEAD(&sp->so_delegations); |
273 | rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); | ||
274 | sp->so_seqid.sequence = &sp->so_sequence; | ||
275 | spin_lock_init(&sp->so_sequence.lock); | ||
276 | INIT_LIST_HEAD(&sp->so_sequence.list); | ||
274 | atomic_set(&sp->so_count, 1); | 277 | atomic_set(&sp->so_count, 1); |
275 | return sp; | 278 | return sp; |
276 | } | 279 | } |
@@ -359,7 +362,6 @@ nfs4_alloc_open_state(void) | |||
359 | memset(state->stateid.data, 0, sizeof(state->stateid.data)); | 362 | memset(state->stateid.data, 0, sizeof(state->stateid.data)); |
360 | atomic_set(&state->count, 1); | 363 | atomic_set(&state->count, 1); |
361 | INIT_LIST_HEAD(&state->lock_states); | 364 | INIT_LIST_HEAD(&state->lock_states); |
362 | init_MUTEX(&state->lock_sema); | ||
363 | spin_lock_init(&state->state_lock); | 365 | spin_lock_init(&state->state_lock); |
364 | return state; | 366 | return state; |
365 | } | 367 | } |
@@ -437,21 +439,23 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) | |||
437 | if (state) | 439 | if (state) |
438 | goto out; | 440 | goto out; |
439 | new = nfs4_alloc_open_state(); | 441 | new = nfs4_alloc_open_state(); |
442 | spin_lock(&owner->so_lock); | ||
440 | spin_lock(&inode->i_lock); | 443 | spin_lock(&inode->i_lock); |
441 | state = __nfs4_find_state_byowner(inode, owner); | 444 | state = __nfs4_find_state_byowner(inode, owner); |
442 | if (state == NULL && new != NULL) { | 445 | if (state == NULL && new != NULL) { |
443 | state = new; | 446 | state = new; |
444 | /* Caller *must* be holding owner->so_sem */ | ||
445 | /* Note: The reclaim code dictates that we add stateless | ||
446 | * and read-only stateids to the end of the list */ | ||
447 | list_add_tail(&state->open_states, &owner->so_states); | ||
448 | state->owner = owner; | 447 | state->owner = owner; |
449 | atomic_inc(&owner->so_count); | 448 | atomic_inc(&owner->so_count); |
450 | list_add(&state->inode_states, &nfsi->open_states); | 449 | list_add(&state->inode_states, &nfsi->open_states); |
451 | state->inode = igrab(inode); | 450 | state->inode = igrab(inode); |
452 | spin_unlock(&inode->i_lock); | 451 | spin_unlock(&inode->i_lock); |
452 | /* Note: The reclaim code dictates that we add stateless | ||
453 | * and read-only stateids to the end of the list */ | ||
454 | list_add_tail(&state->open_states, &owner->so_states); | ||
455 | spin_unlock(&owner->so_lock); | ||
453 | } else { | 456 | } else { |
454 | spin_unlock(&inode->i_lock); | 457 | spin_unlock(&inode->i_lock); |
458 | spin_unlock(&owner->so_lock); | ||
455 | if (new) | 459 | if (new) |
456 | nfs4_free_open_state(new); | 460 | nfs4_free_open_state(new); |
457 | } | 461 | } |
@@ -461,19 +465,21 @@ out: | |||
461 | 465 | ||
462 | /* | 466 | /* |
463 | * Beware! Caller must be holding exactly one | 467 | * Beware! Caller must be holding exactly one |
464 | * reference to clp->cl_sem and owner->so_sema! | 468 | * reference to clp->cl_sem! |
465 | */ | 469 | */ |
466 | void nfs4_put_open_state(struct nfs4_state *state) | 470 | void nfs4_put_open_state(struct nfs4_state *state) |
467 | { | 471 | { |
468 | struct inode *inode = state->inode; | 472 | struct inode *inode = state->inode; |
469 | struct nfs4_state_owner *owner = state->owner; | 473 | struct nfs4_state_owner *owner = state->owner; |
470 | 474 | ||
471 | if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) | 475 | if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) |
472 | return; | 476 | return; |
477 | spin_lock(&inode->i_lock); | ||
473 | if (!list_empty(&state->inode_states)) | 478 | if (!list_empty(&state->inode_states)) |
474 | list_del(&state->inode_states); | 479 | list_del(&state->inode_states); |
475 | spin_unlock(&inode->i_lock); | ||
476 | list_del(&state->open_states); | 480 | list_del(&state->open_states); |
481 | spin_unlock(&inode->i_lock); | ||
482 | spin_unlock(&owner->so_lock); | ||
477 | iput(inode); | 483 | iput(inode); |
478 | BUG_ON (state->state != 0); | 484 | BUG_ON (state->state != 0); |
479 | nfs4_free_open_state(state); | 485 | nfs4_free_open_state(state); |
@@ -481,20 +487,17 @@ void nfs4_put_open_state(struct nfs4_state *state) | |||
481 | } | 487 | } |
482 | 488 | ||
483 | /* | 489 | /* |
484 | * Beware! Caller must be holding no references to clp->cl_sem! | 490 | * Close the current file. |
485 | * of owner->so_sema! | ||
486 | */ | 491 | */ |
487 | void nfs4_close_state(struct nfs4_state *state, mode_t mode) | 492 | void nfs4_close_state(struct nfs4_state *state, mode_t mode) |
488 | { | 493 | { |
489 | struct inode *inode = state->inode; | 494 | struct inode *inode = state->inode; |
490 | struct nfs4_state_owner *owner = state->owner; | 495 | struct nfs4_state_owner *owner = state->owner; |
491 | struct nfs4_client *clp = owner->so_client; | ||
492 | int newstate; | 496 | int newstate; |
493 | 497 | ||
494 | atomic_inc(&owner->so_count); | 498 | atomic_inc(&owner->so_count); |
495 | down_read(&clp->cl_sem); | ||
496 | down(&owner->so_sema); | ||
497 | /* Protect against nfs4_find_state() */ | 499 | /* Protect against nfs4_find_state() */ |
500 | spin_lock(&owner->so_lock); | ||
498 | spin_lock(&inode->i_lock); | 501 | spin_lock(&inode->i_lock); |
499 | if (mode & FMODE_READ) | 502 | if (mode & FMODE_READ) |
500 | state->nreaders--; | 503 | state->nreaders--; |
@@ -507,6 +510,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) | |||
507 | list_move_tail(&state->open_states, &owner->so_states); | 510 | list_move_tail(&state->open_states, &owner->so_states); |
508 | } | 511 | } |
509 | spin_unlock(&inode->i_lock); | 512 | spin_unlock(&inode->i_lock); |
513 | spin_unlock(&owner->so_lock); | ||
510 | newstate = 0; | 514 | newstate = 0; |
511 | if (state->state != 0) { | 515 | if (state->state != 0) { |
512 | if (state->nreaders) | 516 | if (state->nreaders) |
@@ -515,14 +519,16 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) | |||
515 | newstate |= FMODE_WRITE; | 519 | newstate |= FMODE_WRITE; |
516 | if (state->state == newstate) | 520 | if (state->state == newstate) |
517 | goto out; | 521 | goto out; |
518 | if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) | 522 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { |
523 | state->state = newstate; | ||
524 | goto out; | ||
525 | } | ||
526 | if (nfs4_do_close(inode, state, newstate) == 0) | ||
519 | return; | 527 | return; |
520 | } | 528 | } |
521 | out: | 529 | out: |
522 | nfs4_put_open_state(state); | 530 | nfs4_put_open_state(state); |
523 | up(&owner->so_sema); | ||
524 | nfs4_put_state_owner(owner); | 531 | nfs4_put_state_owner(owner); |
525 | up_read(&clp->cl_sem); | ||
526 | } | 532 | } |
527 | 533 | ||
528 | /* | 534 | /* |
@@ -546,19 +552,16 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | |||
546 | * Return a compatible lock_state. If no initialized lock_state structure | 552 | * Return a compatible lock_state. If no initialized lock_state structure |
547 | * exists, return an uninitialized one. | 553 | * exists, return an uninitialized one. |
548 | * | 554 | * |
549 | * The caller must be holding state->lock_sema | ||
550 | */ | 555 | */ |
551 | static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | 556 | static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) |
552 | { | 557 | { |
553 | struct nfs4_lock_state *lsp; | 558 | struct nfs4_lock_state *lsp; |
554 | struct nfs4_client *clp = state->owner->so_client; | 559 | struct nfs4_client *clp = state->owner->so_client; |
555 | 560 | ||
556 | lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); | 561 | lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); |
557 | if (lsp == NULL) | 562 | if (lsp == NULL) |
558 | return NULL; | 563 | return NULL; |
559 | lsp->ls_flags = 0; | 564 | lsp->ls_seqid.sequence = &state->owner->so_sequence; |
560 | lsp->ls_seqid = 0; /* arbitrary */ | ||
561 | memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); | ||
562 | atomic_set(&lsp->ls_count, 1); | 565 | atomic_set(&lsp->ls_count, 1); |
563 | lsp->ls_owner = fl_owner; | 566 | lsp->ls_owner = fl_owner; |
564 | spin_lock(&clp->cl_lock); | 567 | spin_lock(&clp->cl_lock); |
@@ -572,7 +575,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
572 | * Return a compatible lock_state. If no initialized lock_state structure | 575 | * Return a compatible lock_state. If no initialized lock_state structure |
573 | * exists, return an uninitialized one. | 576 | * exists, return an uninitialized one. |
574 | * | 577 | * |
575 | * The caller must be holding state->lock_sema and clp->cl_sem | 578 | * The caller must be holding clp->cl_sem |
576 | */ | 579 | */ |
577 | static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) | 580 | static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) |
578 | { | 581 | { |
@@ -605,7 +608,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ | |||
605 | * Release reference to lock_state, and free it if we see that | 608 | * Release reference to lock_state, and free it if we see that |
606 | * it is no longer in use | 609 | * it is no longer in use |
607 | */ | 610 | */ |
608 | static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) | 611 | void nfs4_put_lock_state(struct nfs4_lock_state *lsp) |
609 | { | 612 | { |
610 | struct nfs4_state *state; | 613 | struct nfs4_state *state; |
611 | 614 | ||
@@ -673,29 +676,94 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f | |||
673 | nfs4_put_lock_state(lsp); | 676 | nfs4_put_lock_state(lsp); |
674 | } | 677 | } |
675 | 678 | ||
676 | /* | 679 | struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) |
677 | * Called with state->lock_sema and clp->cl_sem held. | ||
678 | */ | ||
679 | void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) | ||
680 | { | 680 | { |
681 | if (status == NFS_OK || seqid_mutating_err(-status)) | 681 | struct nfs_seqid *new; |
682 | lsp->ls_seqid++; | 682 | |
683 | new = kmalloc(sizeof(*new), GFP_KERNEL); | ||
684 | if (new != NULL) { | ||
685 | new->sequence = counter; | ||
686 | INIT_LIST_HEAD(&new->list); | ||
687 | } | ||
688 | return new; | ||
689 | } | ||
690 | |||
691 | void nfs_free_seqid(struct nfs_seqid *seqid) | ||
692 | { | ||
693 | struct rpc_sequence *sequence = seqid->sequence->sequence; | ||
694 | |||
695 | if (!list_empty(&seqid->list)) { | ||
696 | spin_lock(&sequence->lock); | ||
697 | list_del(&seqid->list); | ||
698 | spin_unlock(&sequence->lock); | ||
699 | } | ||
700 | rpc_wake_up_next(&sequence->wait); | ||
701 | kfree(seqid); | ||
683 | } | 702 | } |
684 | 703 | ||
685 | /* | 704 | /* |
686 | * Called with sp->so_sema and clp->cl_sem held. | 705 | * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or |
687 | * | 706 | * failed with a seqid incrementing error - |
688 | * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or | 707 | * see comments nfs_fs.h:seqid_mutating_error() |
689 | * failed with a seqid incrementing error - | 708 | */ |
690 | * see comments nfs_fs.h:seqid_mutating_error() | 709 | static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) |
691 | */ | 710 | { |
692 | void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) | 711 | switch (status) { |
693 | { | 712 | case 0: |
694 | if (status == NFS_OK || seqid_mutating_err(-status)) | 713 | break; |
695 | sp->so_seqid++; | 714 | case -NFS4ERR_BAD_SEQID: |
696 | /* If the server returns BAD_SEQID, unhash state_owner here */ | 715 | case -NFS4ERR_STALE_CLIENTID: |
697 | if (status == -NFS4ERR_BAD_SEQID) | 716 | case -NFS4ERR_STALE_STATEID: |
717 | case -NFS4ERR_BAD_STATEID: | ||
718 | case -NFS4ERR_BADXDR: | ||
719 | case -NFS4ERR_RESOURCE: | ||
720 | case -NFS4ERR_NOFILEHANDLE: | ||
721 | /* Non-seqid mutating errors */ | ||
722 | return; | ||
723 | }; | ||
724 | /* | ||
725 | * Note: no locking needed as we are guaranteed to be first | ||
726 | * on the sequence list | ||
727 | */ | ||
728 | seqid->sequence->counter++; | ||
729 | } | ||
730 | |||
731 | void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) | ||
732 | { | ||
733 | if (status == -NFS4ERR_BAD_SEQID) { | ||
734 | struct nfs4_state_owner *sp = container_of(seqid->sequence, | ||
735 | struct nfs4_state_owner, so_seqid); | ||
698 | nfs4_drop_state_owner(sp); | 736 | nfs4_drop_state_owner(sp); |
737 | } | ||
738 | return nfs_increment_seqid(status, seqid); | ||
739 | } | ||
740 | |||
741 | /* | ||
742 | * Increment the seqid if the LOCK/LOCKU succeeded, or | ||
743 | * failed with a seqid incrementing error - | ||
744 | * see comments nfs_fs.h:seqid_mutating_error() | ||
745 | */ | ||
746 | void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) | ||
747 | { | ||
748 | return nfs_increment_seqid(status, seqid); | ||
749 | } | ||
750 | |||
751 | int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) | ||
752 | { | ||
753 | struct rpc_sequence *sequence = seqid->sequence->sequence; | ||
754 | int status = 0; | ||
755 | |||
756 | if (sequence->list.next == &seqid->list) | ||
757 | goto out; | ||
758 | spin_lock(&sequence->lock); | ||
759 | if (!list_empty(&sequence->list)) { | ||
760 | rpc_sleep_on(&sequence->wait, task, NULL, NULL); | ||
761 | status = -EAGAIN; | ||
762 | } else | ||
763 | list_add(&seqid->list, &sequence->list); | ||
764 | spin_unlock(&sequence->lock); | ||
765 | out: | ||
766 | return status; | ||
699 | } | 767 | } |
700 | 768 | ||
701 | static int reclaimer(void *); | 769 | static int reclaimer(void *); |
@@ -791,8 +859,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n | |||
791 | if (state->state == 0) | 859 | if (state->state == 0) |
792 | continue; | 860 | continue; |
793 | status = ops->recover_open(sp, state); | 861 | status = ops->recover_open(sp, state); |
794 | list_for_each_entry(lock, &state->lock_states, ls_locks) | ||
795 | lock->ls_flags &= ~NFS_LOCK_INITIALIZED; | ||
796 | if (status >= 0) { | 862 | if (status >= 0) { |
797 | status = nfs4_reclaim_locks(ops, state); | 863 | status = nfs4_reclaim_locks(ops, state); |
798 | if (status < 0) | 864 | if (status < 0) |
@@ -831,6 +897,28 @@ out_err: | |||
831 | return status; | 897 | return status; |
832 | } | 898 | } |
833 | 899 | ||
900 | static void nfs4_state_mark_reclaim(struct nfs4_client *clp) | ||
901 | { | ||
902 | struct nfs4_state_owner *sp; | ||
903 | struct nfs4_state *state; | ||
904 | struct nfs4_lock_state *lock; | ||
905 | |||
906 | /* Reset all sequence ids to zero */ | ||
907 | list_for_each_entry(sp, &clp->cl_state_owners, so_list) { | ||
908 | sp->so_seqid.counter = 0; | ||
909 | sp->so_seqid.flags = 0; | ||
910 | spin_lock(&sp->so_lock); | ||
911 | list_for_each_entry(state, &sp->so_states, open_states) { | ||
912 | list_for_each_entry(lock, &state->lock_states, ls_locks) { | ||
913 | lock->ls_seqid.counter = 0; | ||
914 | lock->ls_seqid.flags = 0; | ||
915 | lock->ls_flags &= ~NFS_LOCK_INITIALIZED; | ||
916 | } | ||
917 | } | ||
918 | spin_unlock(&sp->so_lock); | ||
919 | } | ||
920 | } | ||
921 | |||
834 | static int reclaimer(void *ptr) | 922 | static int reclaimer(void *ptr) |
835 | { | 923 | { |
836 | struct reclaimer_args *args = (struct reclaimer_args *)ptr; | 924 | struct reclaimer_args *args = (struct reclaimer_args *)ptr; |
@@ -864,6 +952,7 @@ restart_loop: | |||
864 | default: | 952 | default: |
865 | ops = &nfs4_network_partition_recovery_ops; | 953 | ops = &nfs4_network_partition_recovery_ops; |
866 | }; | 954 | }; |
955 | nfs4_state_mark_reclaim(clp); | ||
867 | status = __nfs4_init_client(clp); | 956 | status = __nfs4_init_client(clp); |
868 | if (status) | 957 | if (status) |
869 | goto out_error; | 958 | goto out_error; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6c564ef9489e..cd762648fa9a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) | |||
602 | { | 602 | { |
603 | uint32_t *p; | 603 | uint32_t *p; |
604 | 604 | ||
605 | RESERVE_SPACE(8+sizeof(arg->stateid.data)); | 605 | RESERVE_SPACE(8+sizeof(arg->stateid->data)); |
606 | WRITE32(OP_CLOSE); | 606 | WRITE32(OP_CLOSE); |
607 | WRITE32(arg->seqid); | 607 | WRITE32(arg->seqid->sequence->counter); |
608 | WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); | 608 | WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); |
609 | 609 | ||
610 | return 0; | 610 | return 0; |
611 | } | 611 | } |
@@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) | |||
729 | WRITE64(arg->length); | 729 | WRITE64(arg->length); |
730 | WRITE32(opargs->new_lock_owner); | 730 | WRITE32(opargs->new_lock_owner); |
731 | if (opargs->new_lock_owner){ | 731 | if (opargs->new_lock_owner){ |
732 | struct nfs_open_to_lock *ol = opargs->u.open_lock; | ||
733 | |||
734 | RESERVE_SPACE(40); | 732 | RESERVE_SPACE(40); |
735 | WRITE32(ol->open_seqid); | 733 | WRITE32(opargs->open_seqid->sequence->counter); |
736 | WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); | 734 | WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); |
737 | WRITE32(ol->lock_seqid); | 735 | WRITE32(opargs->lock_seqid->sequence->counter); |
738 | WRITE64(ol->lock_owner.clientid); | 736 | WRITE64(opargs->lock_owner.clientid); |
739 | WRITE32(4); | 737 | WRITE32(4); |
740 | WRITE32(ol->lock_owner.id); | 738 | WRITE32(opargs->lock_owner.id); |
741 | } | 739 | } |
742 | else { | 740 | else { |
743 | struct nfs_exist_lock *el = opargs->u.exist_lock; | ||
744 | |||
745 | RESERVE_SPACE(20); | 741 | RESERVE_SPACE(20); |
746 | WRITEMEM(&el->stateid, sizeof(el->stateid)); | 742 | WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); |
747 | WRITE32(el->seqid); | 743 | WRITE32(opargs->lock_seqid->sequence->counter); |
748 | } | 744 | } |
749 | 745 | ||
750 | return 0; | 746 | return 0; |
@@ -775,8 +771,8 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) | |||
775 | RESERVE_SPACE(44); | 771 | RESERVE_SPACE(44); |
776 | WRITE32(OP_LOCKU); | 772 | WRITE32(OP_LOCKU); |
777 | WRITE32(arg->type); | 773 | WRITE32(arg->type); |
778 | WRITE32(opargs->seqid); | 774 | WRITE32(opargs->seqid->sequence->counter); |
779 | WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); | 775 | WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); |
780 | WRITE64(arg->offset); | 776 | WRITE64(arg->offset); |
781 | WRITE64(arg->length); | 777 | WRITE64(arg->length); |
782 | 778 | ||
@@ -826,7 +822,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena | |||
826 | */ | 822 | */ |
827 | RESERVE_SPACE(8); | 823 | RESERVE_SPACE(8); |
828 | WRITE32(OP_OPEN); | 824 | WRITE32(OP_OPEN); |
829 | WRITE32(arg->seqid); | 825 | WRITE32(arg->seqid->sequence->counter); |
830 | encode_share_access(xdr, arg->open_flags); | 826 | encode_share_access(xdr, arg->open_flags); |
831 | RESERVE_SPACE(16); | 827 | RESERVE_SPACE(16); |
832 | WRITE64(arg->clientid); | 828 | WRITE64(arg->clientid); |
@@ -941,7 +937,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con | |||
941 | RESERVE_SPACE(8+sizeof(arg->stateid.data)); | 937 | RESERVE_SPACE(8+sizeof(arg->stateid.data)); |
942 | WRITE32(OP_OPEN_CONFIRM); | 938 | WRITE32(OP_OPEN_CONFIRM); |
943 | WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); | 939 | WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); |
944 | WRITE32(arg->seqid); | 940 | WRITE32(arg->seqid->sequence->counter); |
945 | 941 | ||
946 | return 0; | 942 | return 0; |
947 | } | 943 | } |
@@ -950,10 +946,10 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea | |||
950 | { | 946 | { |
951 | uint32_t *p; | 947 | uint32_t *p; |
952 | 948 | ||
953 | RESERVE_SPACE(8+sizeof(arg->stateid.data)); | 949 | RESERVE_SPACE(8+sizeof(arg->stateid->data)); |
954 | WRITE32(OP_OPEN_DOWNGRADE); | 950 | WRITE32(OP_OPEN_DOWNGRADE); |
955 | WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); | 951 | WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); |
956 | WRITE32(arg->seqid); | 952 | WRITE32(arg->seqid->sequence->counter); |
957 | encode_share_access(xdr, arg->open_flags); | 953 | encode_share_access(xdr, arg->open_flags); |
958 | return 0; | 954 | return 0; |
959 | } | 955 | } |
@@ -1437,6 +1433,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena | |||
1437 | }; | 1433 | }; |
1438 | int status; | 1434 | int status; |
1439 | 1435 | ||
1436 | status = nfs_wait_on_sequence(args->seqid, req->rq_task); | ||
1437 | if (status != 0) | ||
1438 | goto out; | ||
1440 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 1439 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
1441 | encode_compound_hdr(&xdr, &hdr); | 1440 | encode_compound_hdr(&xdr, &hdr); |
1442 | status = encode_putfh(&xdr, args->fh); | 1441 | status = encode_putfh(&xdr, args->fh); |
@@ -1464,6 +1463,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n | |||
1464 | }; | 1463 | }; |
1465 | int status; | 1464 | int status; |
1466 | 1465 | ||
1466 | status = nfs_wait_on_sequence(args->seqid, req->rq_task); | ||
1467 | if (status != 0) | ||
1468 | goto out; | ||
1467 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 1469 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
1468 | encode_compound_hdr(&xdr, &hdr); | 1470 | encode_compound_hdr(&xdr, &hdr); |
1469 | status = encode_putfh(&xdr, args->fh); | 1471 | status = encode_putfh(&xdr, args->fh); |
@@ -1485,6 +1487,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf | |||
1485 | }; | 1487 | }; |
1486 | int status; | 1488 | int status; |
1487 | 1489 | ||
1490 | status = nfs_wait_on_sequence(args->seqid, req->rq_task); | ||
1491 | if (status != 0) | ||
1492 | goto out; | ||
1488 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 1493 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
1489 | encode_compound_hdr(&xdr, &hdr); | 1494 | encode_compound_hdr(&xdr, &hdr); |
1490 | status = encode_putfh(&xdr, args->fh); | 1495 | status = encode_putfh(&xdr, args->fh); |
@@ -1525,8 +1530,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka | |||
1525 | struct compound_hdr hdr = { | 1530 | struct compound_hdr hdr = { |
1526 | .nops = 2, | 1531 | .nops = 2, |
1527 | }; | 1532 | }; |
1533 | struct nfs_lock_opargs *opargs = args->u.lock; | ||
1528 | int status; | 1534 | int status; |
1529 | 1535 | ||
1536 | status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); | ||
1537 | if (status != 0) | ||
1538 | goto out; | ||
1539 | /* Do we need to do an open_to_lock_owner? */ | ||
1540 | if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) | ||
1541 | opargs->new_lock_owner = 0; | ||
1530 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 1542 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
1531 | encode_compound_hdr(&xdr, &hdr); | 1543 | encode_compound_hdr(&xdr, &hdr); |
1532 | status = encode_putfh(&xdr, args->fh); | 1544 | status = encode_putfh(&xdr, args->fh); |
@@ -2890,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) | |||
2890 | 2902 | ||
2891 | status = decode_op_hdr(xdr, OP_LOCK); | 2903 | status = decode_op_hdr(xdr, OP_LOCK); |
2892 | if (status == 0) { | 2904 | if (status == 0) { |
2893 | READ_BUF(sizeof(nfs4_stateid)); | 2905 | READ_BUF(sizeof(res->u.stateid.data)); |
2894 | COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); | 2906 | COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); |
2895 | } else if (status == -NFS4ERR_DENIED) | 2907 | } else if (status == -NFS4ERR_DENIED) |
2896 | return decode_lock_denied(xdr, &res->u.denied); | 2908 | return decode_lock_denied(xdr, &res->u.denied); |
2897 | return status; | 2909 | return status; |
@@ -2913,8 +2925,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) | |||
2913 | 2925 | ||
2914 | status = decode_op_hdr(xdr, OP_LOCKU); | 2926 | status = decode_op_hdr(xdr, OP_LOCKU); |
2915 | if (status == 0) { | 2927 | if (status == 0) { |
2916 | READ_BUF(sizeof(nfs4_stateid)); | 2928 | READ_BUF(sizeof(res->u.stateid.data)); |
2917 | COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); | 2929 | COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); |
2918 | } | 2930 | } |
2919 | return status; | 2931 | return status; |
2920 | } | 2932 | } |
@@ -3243,7 +3255,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
3243 | if (attrlen <= *acl_len) | 3255 | if (attrlen <= *acl_len) |
3244 | xdr_read_pages(xdr, attrlen); | 3256 | xdr_read_pages(xdr, attrlen); |
3245 | *acl_len = attrlen; | 3257 | *acl_len = attrlen; |
3246 | } | 3258 | } else |
3259 | status = -EOPNOTSUPP; | ||
3247 | 3260 | ||
3248 | out: | 3261 | out: |
3249 | return status; | 3262 | return status; |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index be23c3fb9260..8fef86523d7f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) | |||
216 | 216 | ||
217 | static int | 217 | static int |
218 | nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 218 | nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
219 | int flags) | 219 | int flags, struct nameidata *nd) |
220 | { | 220 | { |
221 | struct nfs_fh fhandle; | 221 | struct nfs_fh fhandle; |
222 | struct nfs_fattr fattr; | 222 | struct nfs_fattr fattr; |
@@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) | |||
739 | } | 739 | } |
740 | 740 | ||
741 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | 741 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, |
742 | int flags, struct file *f) | 742 | int flags, struct file *f, |
743 | int (*open)(struct inode *, struct file *)) | ||
743 | { | 744 | { |
744 | struct inode *inode; | 745 | struct inode *inode; |
745 | int error; | 746 | int error; |
@@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
761 | f->f_op = fops_get(inode->i_fop); | 762 | f->f_op = fops_get(inode->i_fop); |
762 | file_move(f, &inode->i_sb->s_files); | 763 | file_move(f, &inode->i_sb->s_files); |
763 | 764 | ||
764 | if (f->f_op && f->f_op->open) { | 765 | if (!open && f->f_op) |
765 | error = f->f_op->open(inode,f); | 766 | open = f->f_op->open; |
767 | if (open) { | ||
768 | error = open(inode, f); | ||
766 | if (error) | 769 | if (error) |
767 | goto cleanup_all; | 770 | goto cleanup_all; |
768 | } | 771 | } |
772 | |||
769 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); | 773 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
770 | 774 | ||
771 | file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); | 775 | file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); |
@@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode) | |||
814 | { | 818 | { |
815 | int namei_flags, error; | 819 | int namei_flags, error; |
816 | struct nameidata nd; | 820 | struct nameidata nd; |
817 | struct file *f; | ||
818 | 821 | ||
819 | namei_flags = flags; | 822 | namei_flags = flags; |
820 | if ((namei_flags+1) & O_ACCMODE) | 823 | if ((namei_flags+1) & O_ACCMODE) |
821 | namei_flags++; | 824 | namei_flags++; |
822 | if (namei_flags & O_TRUNC) | ||
823 | namei_flags |= 2; | ||
824 | |||
825 | error = -ENFILE; | ||
826 | f = get_empty_filp(); | ||
827 | if (f == NULL) | ||
828 | return ERR_PTR(error); | ||
829 | 825 | ||
830 | error = open_namei(filename, namei_flags, mode, &nd); | 826 | error = open_namei(filename, namei_flags, mode, &nd); |
831 | if (!error) | 827 | if (!error) |
832 | return __dentry_open(nd.dentry, nd.mnt, flags, f); | 828 | return nameidata_to_filp(&nd, flags); |
833 | 829 | ||
834 | put_filp(f); | ||
835 | return ERR_PTR(error); | 830 | return ERR_PTR(error); |
836 | } | 831 | } |
837 | EXPORT_SYMBOL(filp_open); | 832 | EXPORT_SYMBOL(filp_open); |
838 | 833 | ||
834 | /** | ||
835 | * lookup_instantiate_filp - instantiates the open intent filp | ||
836 | * @nd: pointer to nameidata | ||
837 | * @dentry: pointer to dentry | ||
838 | * @open: open callback | ||
839 | * | ||
840 | * Helper for filesystems that want to use lookup open intents and pass back | ||
841 | * a fully instantiated struct file to the caller. | ||
842 | * This function is meant to be called from within a filesystem's | ||
843 | * lookup method. | ||
844 | * Note that in case of error, nd->intent.open.file is destroyed, but the | ||
845 | * path information remains valid. | ||
846 | * If the open callback is set to NULL, then the standard f_op->open() | ||
847 | * filesystem callback is substituted. | ||
848 | */ | ||
849 | struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, | ||
850 | int (*open)(struct inode *, struct file *)) | ||
851 | { | ||
852 | if (IS_ERR(nd->intent.open.file)) | ||
853 | goto out; | ||
854 | if (IS_ERR(dentry)) | ||
855 | goto out_err; | ||
856 | nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), | ||
857 | nd->intent.open.flags - 1, | ||
858 | nd->intent.open.file, | ||
859 | open); | ||
860 | out: | ||
861 | return nd->intent.open.file; | ||
862 | out_err: | ||
863 | release_open_intent(nd); | ||
864 | nd->intent.open.file = (struct file *)dentry; | ||
865 | goto out; | ||
866 | } | ||
867 | EXPORT_SYMBOL_GPL(lookup_instantiate_filp); | ||
868 | |||
869 | /** | ||
870 | * nameidata_to_filp - convert a nameidata to an open filp. | ||
871 | * @nd: pointer to nameidata | ||
872 | * @flags: open flags | ||
873 | * | ||
874 | * Note that this function destroys the original nameidata | ||
875 | */ | ||
876 | struct file *nameidata_to_filp(struct nameidata *nd, int flags) | ||
877 | { | ||
878 | struct file *filp; | ||
879 | |||
880 | /* Pick up the filp from the open intent */ | ||
881 | filp = nd->intent.open.file; | ||
882 | /* Has the filesystem initialised the file for us? */ | ||
883 | if (filp->f_dentry == NULL) | ||
884 | filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); | ||
885 | else | ||
886 | path_release(nd); | ||
887 | return filp; | ||
888 | } | ||
889 | |||
839 | struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) | 890 | struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) |
840 | { | 891 | { |
841 | int error; | 892 | int error; |
@@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) | |||
846 | if (f == NULL) | 897 | if (f == NULL) |
847 | return ERR_PTR(error); | 898 | return ERR_PTR(error); |
848 | 899 | ||
849 | return __dentry_open(dentry, mnt, flags, f); | 900 | return __dentry_open(dentry, mnt, flags, f, NULL); |
850 | } | 901 | } |
851 | EXPORT_SYMBOL(dentry_open); | 902 | EXPORT_SYMBOL(dentry_open); |
852 | 903 | ||
diff --git a/include/linux/namei.h b/include/linux/namei.h index 7db67b008cac..1c975d0d9e94 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h | |||
@@ -8,6 +8,7 @@ struct vfsmount; | |||
8 | struct open_intent { | 8 | struct open_intent { |
9 | int flags; | 9 | int flags; |
10 | int create_mode; | 10 | int create_mode; |
11 | struct file *file; | ||
11 | }; | 12 | }; |
12 | 13 | ||
13 | enum { MAX_NESTED_LINKS = 5 }; | 14 | enum { MAX_NESTED_LINKS = 5 }; |
@@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); | |||
65 | extern void path_release(struct nameidata *); | 66 | extern void path_release(struct nameidata *); |
66 | extern void path_release_on_umount(struct nameidata *); | 67 | extern void path_release_on_umount(struct nameidata *); |
67 | 68 | ||
69 | extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); | ||
70 | extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); | ||
71 | extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, | ||
72 | int (*open)(struct inode *, struct file *)); | ||
73 | extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); | ||
74 | extern void release_open_intent(struct nameidata *); | ||
75 | |||
68 | extern struct dentry * lookup_one_len(const char *, struct dentry *, int); | 76 | extern struct dentry * lookup_one_len(const char *, struct dentry *, int); |
69 | extern struct dentry * lookup_hash(struct qstr *, struct dentry *); | 77 | extern struct dentry * lookup_hash(struct qstr *, struct dentry *); |
70 | 78 | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9a6047ff1b25..7bac2785c6e4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -41,6 +41,10 @@ | |||
41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 | 41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 |
42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 | 42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 |
43 | 43 | ||
44 | /* Default timeout values */ | ||
45 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) | ||
46 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) | ||
47 | |||
44 | /* | 48 | /* |
45 | * superblock magic number for NFS | 49 | * superblock magic number for NFS |
46 | */ | 50 | */ |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a2bf6914ff1b..60086dac11d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -96,12 +96,13 @@ struct nfs4_change_info { | |||
96 | u64 after; | 96 | u64 after; |
97 | }; | 97 | }; |
98 | 98 | ||
99 | struct nfs_seqid; | ||
99 | /* | 100 | /* |
100 | * Arguments to the open call. | 101 | * Arguments to the open call. |
101 | */ | 102 | */ |
102 | struct nfs_openargs { | 103 | struct nfs_openargs { |
103 | const struct nfs_fh * fh; | 104 | const struct nfs_fh * fh; |
104 | __u32 seqid; | 105 | struct nfs_seqid * seqid; |
105 | int open_flags; | 106 | int open_flags; |
106 | __u64 clientid; | 107 | __u64 clientid; |
107 | __u32 id; | 108 | __u32 id; |
@@ -136,7 +137,7 @@ struct nfs_openres { | |||
136 | struct nfs_open_confirmargs { | 137 | struct nfs_open_confirmargs { |
137 | const struct nfs_fh * fh; | 138 | const struct nfs_fh * fh; |
138 | nfs4_stateid stateid; | 139 | nfs4_stateid stateid; |
139 | __u32 seqid; | 140 | struct nfs_seqid * seqid; |
140 | }; | 141 | }; |
141 | 142 | ||
142 | struct nfs_open_confirmres { | 143 | struct nfs_open_confirmres { |
@@ -148,8 +149,8 @@ struct nfs_open_confirmres { | |||
148 | */ | 149 | */ |
149 | struct nfs_closeargs { | 150 | struct nfs_closeargs { |
150 | struct nfs_fh * fh; | 151 | struct nfs_fh * fh; |
151 | nfs4_stateid stateid; | 152 | nfs4_stateid * stateid; |
152 | __u32 seqid; | 153 | struct nfs_seqid * seqid; |
153 | int open_flags; | 154 | int open_flags; |
154 | }; | 155 | }; |
155 | 156 | ||
@@ -164,30 +165,19 @@ struct nfs_lowner { | |||
164 | u32 id; | 165 | u32 id; |
165 | }; | 166 | }; |
166 | 167 | ||
167 | struct nfs_open_to_lock { | ||
168 | __u32 open_seqid; | ||
169 | nfs4_stateid open_stateid; | ||
170 | __u32 lock_seqid; | ||
171 | struct nfs_lowner lock_owner; | ||
172 | }; | ||
173 | |||
174 | struct nfs_exist_lock { | ||
175 | nfs4_stateid stateid; | ||
176 | __u32 seqid; | ||
177 | }; | ||
178 | |||
179 | struct nfs_lock_opargs { | 168 | struct nfs_lock_opargs { |
169 | struct nfs_seqid * lock_seqid; | ||
170 | nfs4_stateid * lock_stateid; | ||
171 | struct nfs_seqid * open_seqid; | ||
172 | nfs4_stateid * open_stateid; | ||
173 | struct nfs_lowner lock_owner; | ||
180 | __u32 reclaim; | 174 | __u32 reclaim; |
181 | __u32 new_lock_owner; | 175 | __u32 new_lock_owner; |
182 | union { | ||
183 | struct nfs_open_to_lock *open_lock; | ||
184 | struct nfs_exist_lock *exist_lock; | ||
185 | } u; | ||
186 | }; | 176 | }; |
187 | 177 | ||
188 | struct nfs_locku_opargs { | 178 | struct nfs_locku_opargs { |
189 | __u32 seqid; | 179 | struct nfs_seqid * seqid; |
190 | nfs4_stateid stateid; | 180 | nfs4_stateid * stateid; |
191 | }; | 181 | }; |
192 | 182 | ||
193 | struct nfs_lockargs { | 183 | struct nfs_lockargs { |
@@ -722,7 +712,7 @@ struct nfs_rpc_ops { | |||
722 | int (*write) (struct nfs_write_data *); | 712 | int (*write) (struct nfs_write_data *); |
723 | int (*commit) (struct nfs_write_data *); | 713 | int (*commit) (struct nfs_write_data *); |
724 | int (*create) (struct inode *, struct dentry *, | 714 | int (*create) (struct inode *, struct dentry *, |
725 | struct iattr *, int); | 715 | struct iattr *, int, struct nameidata *); |
726 | int (*remove) (struct inode *, struct qstr *); | 716 | int (*remove) (struct inode *, struct qstr *); |
727 | int (*unlink_setup) (struct rpc_message *, | 717 | int (*unlink_setup) (struct rpc_message *, |
728 | struct dentry *, struct qstr *); | 718 | struct dentry *, struct qstr *); |
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 04ebc24db348..b68c11a2d6dd 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h | |||
@@ -66,7 +66,12 @@ struct rpc_cred_cache { | |||
66 | 66 | ||
67 | struct rpc_auth { | 67 | struct rpc_auth { |
68 | unsigned int au_cslack; /* call cred size estimate */ | 68 | unsigned int au_cslack; /* call cred size estimate */ |
69 | unsigned int au_rslack; /* reply verf size guess */ | 69 | /* guess at number of u32's auth adds before |
70 | * reply data; normally the verifier size: */ | ||
71 | unsigned int au_rslack; | ||
72 | /* for gss, used to calculate au_rslack: */ | ||
73 | unsigned int au_verfsize; | ||
74 | |||
70 | unsigned int au_flags; /* various flags */ | 75 | unsigned int au_flags; /* various flags */ |
71 | struct rpc_authops * au_ops; /* operations */ | 76 | struct rpc_authops * au_ops; /* operations */ |
72 | rpc_authflavor_t au_flavor; /* pseudoflavor (note may | 77 | rpc_authflavor_t au_flavor; /* pseudoflavor (note may |
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index eadb31e3c198..1a42d902bc11 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h | |||
@@ -32,6 +32,7 @@ | |||
32 | #define RPCDBG_AUTH 0x0010 | 32 | #define RPCDBG_AUTH 0x0010 |
33 | #define RPCDBG_PMAP 0x0020 | 33 | #define RPCDBG_PMAP 0x0020 |
34 | #define RPCDBG_SCHED 0x0040 | 34 | #define RPCDBG_SCHED 0x0040 |
35 | #define RPCDBG_TRANS 0x0080 | ||
35 | #define RPCDBG_SVCSOCK 0x0100 | 36 | #define RPCDBG_SVCSOCK 0x0100 |
36 | #define RPCDBG_SVCDSP 0x0200 | 37 | #define RPCDBG_SVCDSP 0x0200 |
37 | #define RPCDBG_MISC 0x0400 | 38 | #define RPCDBG_MISC 0x0400 |
@@ -94,6 +95,8 @@ enum { | |||
94 | CTL_NLMDEBUG, | 95 | CTL_NLMDEBUG, |
95 | CTL_SLOTTABLE_UDP, | 96 | CTL_SLOTTABLE_UDP, |
96 | CTL_SLOTTABLE_TCP, | 97 | CTL_SLOTTABLE_TCP, |
98 | CTL_MIN_RESVPORT, | ||
99 | CTL_MAX_RESVPORT, | ||
97 | }; | 100 | }; |
98 | 101 | ||
99 | #endif /* _LINUX_SUNRPC_DEBUG_H_ */ | 102 | #endif /* _LINUX_SUNRPC_DEBUG_H_ */ |
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 689262f63059..9b8bcf125c18 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h | |||
@@ -40,14 +40,21 @@ int gss_import_sec_context( | |||
40 | struct gss_ctx **ctx_id); | 40 | struct gss_ctx **ctx_id); |
41 | u32 gss_get_mic( | 41 | u32 gss_get_mic( |
42 | struct gss_ctx *ctx_id, | 42 | struct gss_ctx *ctx_id, |
43 | u32 qop, | ||
44 | struct xdr_buf *message, | 43 | struct xdr_buf *message, |
45 | struct xdr_netobj *mic_token); | 44 | struct xdr_netobj *mic_token); |
46 | u32 gss_verify_mic( | 45 | u32 gss_verify_mic( |
47 | struct gss_ctx *ctx_id, | 46 | struct gss_ctx *ctx_id, |
48 | struct xdr_buf *message, | 47 | struct xdr_buf *message, |
49 | struct xdr_netobj *mic_token, | 48 | struct xdr_netobj *mic_token); |
50 | u32 *qstate); | 49 | u32 gss_wrap( |
50 | struct gss_ctx *ctx_id, | ||
51 | int offset, | ||
52 | struct xdr_buf *outbuf, | ||
53 | struct page **inpages); | ||
54 | u32 gss_unwrap( | ||
55 | struct gss_ctx *ctx_id, | ||
56 | int offset, | ||
57 | struct xdr_buf *inbuf); | ||
51 | u32 gss_delete_sec_context( | 58 | u32 gss_delete_sec_context( |
52 | struct gss_ctx **ctx_id); | 59 | struct gss_ctx **ctx_id); |
53 | 60 | ||
@@ -56,7 +63,6 @@ char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); | |||
56 | 63 | ||
57 | struct pf_desc { | 64 | struct pf_desc { |
58 | u32 pseudoflavor; | 65 | u32 pseudoflavor; |
59 | u32 qop; | ||
60 | u32 service; | 66 | u32 service; |
61 | char *name; | 67 | char *name; |
62 | char *auth_domain_name; | 68 | char *auth_domain_name; |
@@ -85,14 +91,21 @@ struct gss_api_ops { | |||
85 | struct gss_ctx *ctx_id); | 91 | struct gss_ctx *ctx_id); |
86 | u32 (*gss_get_mic)( | 92 | u32 (*gss_get_mic)( |
87 | struct gss_ctx *ctx_id, | 93 | struct gss_ctx *ctx_id, |
88 | u32 qop, | ||
89 | struct xdr_buf *message, | 94 | struct xdr_buf *message, |
90 | struct xdr_netobj *mic_token); | 95 | struct xdr_netobj *mic_token); |
91 | u32 (*gss_verify_mic)( | 96 | u32 (*gss_verify_mic)( |
92 | struct gss_ctx *ctx_id, | 97 | struct gss_ctx *ctx_id, |
93 | struct xdr_buf *message, | 98 | struct xdr_buf *message, |
94 | struct xdr_netobj *mic_token, | 99 | struct xdr_netobj *mic_token); |
95 | u32 *qstate); | 100 | u32 (*gss_wrap)( |
101 | struct gss_ctx *ctx_id, | ||
102 | int offset, | ||
103 | struct xdr_buf *outbuf, | ||
104 | struct page **inpages); | ||
105 | u32 (*gss_unwrap)( | ||
106 | struct gss_ctx *ctx_id, | ||
107 | int offset, | ||
108 | struct xdr_buf *buf); | ||
96 | void (*gss_delete_sec_context)( | 109 | void (*gss_delete_sec_context)( |
97 | void *internal_ctx_id); | 110 | void *internal_ctx_id); |
98 | }; | 111 | }; |
diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index 92608a2e574c..a6807867bd21 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h | |||
@@ -66,16 +66,6 @@ typedef unsigned int OM_uint32; | |||
66 | 66 | ||
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Define the default Quality of Protection for per-message services. Note | ||
70 | * that an implementation that offers multiple levels of QOP may either reserve | ||
71 | * a value (for example zero, as assumed here) to mean "default protection", or | ||
72 | * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit | ||
73 | * QOP value. However a value of 0 should always be interpreted by a GSSAPI | ||
74 | * implementation as a request for the default protection level. | ||
75 | */ | ||
76 | #define GSS_C_QOP_DEFAULT 0 | ||
77 | |||
78 | /* | ||
79 | * Expiration time of 2^32-1 seconds means infinite lifetime for a | 69 | * Expiration time of 2^32-1 seconds means infinite lifetime for a |
80 | * credential or security context | 70 | * credential or security context |
81 | */ | 71 | */ |
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index ffe31d2eb9ec..2c3601d31045 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h | |||
@@ -116,18 +116,22 @@ enum seal_alg { | |||
116 | 116 | ||
117 | s32 | 117 | s32 |
118 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | 118 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, |
119 | struct xdr_netobj *cksum); | 119 | int body_offset, struct xdr_netobj *cksum); |
120 | |||
121 | u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, | ||
122 | struct xdr_netobj *); | ||
123 | |||
124 | u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, | ||
125 | struct xdr_netobj *); | ||
120 | 126 | ||
121 | u32 | 127 | u32 |
122 | krb5_make_token(struct krb5_ctx *context_handle, int qop_req, | 128 | gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, |
123 | struct xdr_buf *input_message_buffer, | 129 | struct xdr_buf *outbuf, struct page **pages); |
124 | struct xdr_netobj *output_message_buffer, int toktype); | ||
125 | 130 | ||
126 | u32 | 131 | u32 |
127 | krb5_read_token(struct krb5_ctx *context_handle, | 132 | gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, |
128 | struct xdr_netobj *input_token_buffer, | 133 | struct xdr_buf *buf); |
129 | struct xdr_buf *message_buffer, | 134 | |
130 | int *qop_state, int toktype); | ||
131 | 135 | ||
132 | u32 | 136 | u32 |
133 | krb5_encrypt(struct crypto_tfm * key, | 137 | krb5_encrypt(struct crypto_tfm * key, |
@@ -137,6 +141,13 @@ u32 | |||
137 | krb5_decrypt(struct crypto_tfm * key, | 141 | krb5_decrypt(struct crypto_tfm * key, |
138 | void *iv, void *in, void *out, int length); | 142 | void *iv, void *in, void *out, int length); |
139 | 143 | ||
144 | int | ||
145 | gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, | ||
146 | struct page **pages); | ||
147 | |||
148 | int | ||
149 | gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); | ||
150 | |||
140 | s32 | 151 | s32 |
141 | krb5_make_seq_num(struct crypto_tfm * key, | 152 | krb5_make_seq_num(struct crypto_tfm * key, |
142 | int direction, | 153 | int direction, |
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index b5c9968c3c17..0beb2cf00a84 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h | |||
@@ -41,9 +41,9 @@ struct spkm3_ctx { | |||
41 | #define SPKM_WRAP_TOK 5 | 41 | #define SPKM_WRAP_TOK 5 |
42 | #define SPKM_DEL_TOK 6 | 42 | #define SPKM_DEL_TOK 6 |
43 | 43 | ||
44 | u32 spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, struct xdr_buf * text, struct xdr_netobj * token, int toktype); | 44 | u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype); |
45 | 45 | ||
46 | u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int *qop_state, int toktype); | 46 | u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); |
47 | 47 | ||
48 | #define CKSUMTYPE_RSA_MD5 0x0007 | 48 | #define CKSUMTYPE_RSA_MD5 0x0007 |
49 | 49 | ||
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 15f115332389..f43f237360ae 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h | |||
@@ -76,5 +76,30 @@ enum rpc_auth_stat { | |||
76 | 76 | ||
77 | #define RPC_MAXNETNAMELEN 256 | 77 | #define RPC_MAXNETNAMELEN 256 |
78 | 78 | ||
79 | /* | ||
80 | * From RFC 1831: | ||
81 | * | ||
82 | * "A record is composed of one or more record fragments. A record | ||
83 | * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of | ||
84 | * fragment data. The bytes encode an unsigned binary number; as with | ||
85 | * XDR integers, the byte order is from highest to lowest. The number | ||
86 | * encodes two values -- a boolean which indicates whether the fragment | ||
87 | * is the last fragment of the record (bit value 1 implies the fragment | ||
88 | * is the last fragment) and a 31-bit unsigned binary value which is the | ||
89 | * length in bytes of the fragment's data. The boolean value is the | ||
90 | * highest-order bit of the header; the length is the 31 low-order bits. | ||
91 | * (Note that this record specification is NOT in XDR standard form!)" | ||
92 | * | ||
93 | * The Linux RPC client always sends its requests in a single record | ||
94 | * fragment, limiting the maximum payload size for stream transports to | ||
95 | * 2GB. | ||
96 | */ | ||
97 | |||
98 | typedef u32 rpc_fraghdr; | ||
99 | |||
100 | #define RPC_LAST_STREAM_FRAGMENT (1U << 31) | ||
101 | #define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) | ||
102 | #define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) | ||
103 | |||
79 | #endif /* __KERNEL__ */ | 104 | #endif /* __KERNEL__ */ |
80 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ | 105 | #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 23448d0fb5bc..5da968729cf8 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
@@ -161,14 +161,10 @@ typedef struct { | |||
161 | 161 | ||
162 | typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); | 162 | typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); |
163 | 163 | ||
164 | extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); | ||
164 | extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, | 165 | extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, |
165 | skb_reader_t *, skb_read_actor_t); | 166 | skb_reader_t *, skb_read_actor_t); |
166 | 167 | ||
167 | struct socket; | ||
168 | struct sockaddr; | ||
169 | extern int xdr_sendpages(struct socket *, struct sockaddr *, int, | ||
170 | struct xdr_buf *, unsigned int, int); | ||
171 | |||
172 | extern int xdr_encode_word(struct xdr_buf *, int, u32); | 168 | extern int xdr_encode_word(struct xdr_buf *, int, u32); |
173 | extern int xdr_decode_word(struct xdr_buf *, int, u32 *); | 169 | extern int xdr_decode_word(struct xdr_buf *, int, u32 *); |
174 | 170 | ||
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e618c1649814..3b8b6e823c70 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/include/linux/sunrpc/clnt_xprt.h | 2 | * linux/include/linux/sunrpc/xprt.h |
3 | * | 3 | * |
4 | * Declarations for the RPC transport interface. | 4 | * Declarations for the RPC transport interface. |
5 | * | 5 | * |
@@ -15,20 +15,6 @@ | |||
15 | #include <linux/sunrpc/sched.h> | 15 | #include <linux/sunrpc/sched.h> |
16 | #include <linux/sunrpc/xdr.h> | 16 | #include <linux/sunrpc/xdr.h> |
17 | 17 | ||
18 | /* | ||
19 | * The transport code maintains an estimate on the maximum number of out- | ||
20 | * standing RPC requests, using a smoothed version of the congestion | ||
21 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
22 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
23 | * halved; otherwise, it is incremented by 1/cwnd when | ||
24 | * | ||
25 | * - a reply is received and | ||
26 | * - a full number of requests are outstanding and | ||
27 | * - the congestion window hasn't been updated recently. | ||
28 | * | ||
29 | * Upper procedures may check whether a request would block waiting for | ||
30 | * a free RPC slot by using the RPC_CONGESTED() macro. | ||
31 | */ | ||
32 | extern unsigned int xprt_udp_slot_table_entries; | 18 | extern unsigned int xprt_udp_slot_table_entries; |
33 | extern unsigned int xprt_tcp_slot_table_entries; | 19 | extern unsigned int xprt_tcp_slot_table_entries; |
34 | 20 | ||
@@ -36,34 +22,23 @@ extern unsigned int xprt_tcp_slot_table_entries; | |||
36 | #define RPC_DEF_SLOT_TABLE (16U) | 22 | #define RPC_DEF_SLOT_TABLE (16U) |
37 | #define RPC_MAX_SLOT_TABLE (128U) | 23 | #define RPC_MAX_SLOT_TABLE (128U) |
38 | 24 | ||
39 | #define RPC_CWNDSHIFT (8U) | ||
40 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
41 | #define RPC_INITCWND RPC_CWNDSCALE | ||
42 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
43 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
44 | |||
45 | /* Default timeout values */ | ||
46 | #define RPC_MAX_UDP_TIMEOUT (60*HZ) | ||
47 | #define RPC_MAX_TCP_TIMEOUT (600*HZ) | ||
48 | |||
49 | /* | 25 | /* |
50 | * Wait duration for an RPC TCP connection to be established. Solaris | 26 | * RPC call and reply header size as number of 32bit words (verifier |
51 | * NFS over TCP uses 60 seconds, for example, which is in line with how | 27 | * size computed separately) |
52 | * long a server takes to reboot. | ||
53 | */ | 28 | */ |
54 | #define RPC_CONNECT_TIMEOUT (60*HZ) | 29 | #define RPC_CALLHDRSIZE 6 |
30 | #define RPC_REPHDRSIZE 4 | ||
55 | 31 | ||
56 | /* | 32 | /* |
57 | * Delay an arbitrary number of seconds before attempting to reconnect | 33 | * Parameters for choosing a free port |
58 | * after an error. | ||
59 | */ | 34 | */ |
60 | #define RPC_REESTABLISH_TIMEOUT (15*HZ) | 35 | extern unsigned int xprt_min_resvport; |
36 | extern unsigned int xprt_max_resvport; | ||
61 | 37 | ||
62 | /* RPC call and reply header size as number of 32bit words (verifier | 38 | #define RPC_MIN_RESVPORT (1U) |
63 | * size computed separately) | 39 | #define RPC_MAX_RESVPORT (65535U) |
64 | */ | 40 | #define RPC_DEF_MIN_RESVPORT (650U) |
65 | #define RPC_CALLHDRSIZE 6 | 41 | #define RPC_DEF_MAX_RESVPORT (1023U) |
66 | #define RPC_REPHDRSIZE 4 | ||
67 | 42 | ||
68 | /* | 43 | /* |
69 | * This describes a timeout strategy | 44 | * This describes a timeout strategy |
@@ -76,6 +51,9 @@ struct rpc_timeout { | |||
76 | unsigned char to_exponential; | 51 | unsigned char to_exponential; |
77 | }; | 52 | }; |
78 | 53 | ||
54 | struct rpc_task; | ||
55 | struct rpc_xprt; | ||
56 | |||
79 | /* | 57 | /* |
80 | * This describes a complete RPC request | 58 | * This describes a complete RPC request |
81 | */ | 59 | */ |
@@ -95,7 +73,10 @@ struct rpc_rqst { | |||
95 | int rq_cong; /* has incremented xprt->cong */ | 73 | int rq_cong; /* has incremented xprt->cong */ |
96 | int rq_received; /* receive completed */ | 74 | int rq_received; /* receive completed */ |
97 | u32 rq_seqno; /* gss seq no. used on req. */ | 75 | u32 rq_seqno; /* gss seq no. used on req. */ |
98 | 76 | int rq_enc_pages_num; | |
77 | struct page **rq_enc_pages; /* scratch pages for use by | ||
78 | gss privacy code */ | ||
79 | void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ | ||
99 | struct list_head rq_list; | 80 | struct list_head rq_list; |
100 | 81 | ||
101 | struct xdr_buf rq_private_buf; /* The receive buffer | 82 | struct xdr_buf rq_private_buf; /* The receive buffer |
@@ -121,12 +102,21 @@ struct rpc_rqst { | |||
121 | #define rq_svec rq_snd_buf.head | 102 | #define rq_svec rq_snd_buf.head |
122 | #define rq_slen rq_snd_buf.len | 103 | #define rq_slen rq_snd_buf.len |
123 | 104 | ||
124 | #define XPRT_LAST_FRAG (1 << 0) | 105 | struct rpc_xprt_ops { |
125 | #define XPRT_COPY_RECM (1 << 1) | 106 | void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); |
126 | #define XPRT_COPY_XID (1 << 2) | 107 | int (*reserve_xprt)(struct rpc_task *task); |
127 | #define XPRT_COPY_DATA (1 << 3) | 108 | void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); |
109 | void (*connect)(struct rpc_task *task); | ||
110 | int (*send_request)(struct rpc_task *task); | ||
111 | void (*set_retrans_timeout)(struct rpc_task *task); | ||
112 | void (*timer)(struct rpc_task *task); | ||
113 | void (*release_request)(struct rpc_task *task); | ||
114 | void (*close)(struct rpc_xprt *xprt); | ||
115 | void (*destroy)(struct rpc_xprt *xprt); | ||
116 | }; | ||
128 | 117 | ||
129 | struct rpc_xprt { | 118 | struct rpc_xprt { |
119 | struct rpc_xprt_ops * ops; /* transport methods */ | ||
130 | struct socket * sock; /* BSD socket layer */ | 120 | struct socket * sock; /* BSD socket layer */ |
131 | struct sock * inet; /* INET layer */ | 121 | struct sock * inet; /* INET layer */ |
132 | 122 | ||
@@ -137,11 +127,13 @@ struct rpc_xprt { | |||
137 | unsigned long cong; /* current congestion */ | 127 | unsigned long cong; /* current congestion */ |
138 | unsigned long cwnd; /* congestion window */ | 128 | unsigned long cwnd; /* congestion window */ |
139 | 129 | ||
140 | unsigned int rcvsize, /* socket receive buffer size */ | 130 | size_t rcvsize, /* transport rcv buffer size */ |
141 | sndsize; /* socket send buffer size */ | 131 | sndsize; /* transport send buffer size */ |
142 | 132 | ||
143 | size_t max_payload; /* largest RPC payload size, | 133 | size_t max_payload; /* largest RPC payload size, |
144 | in bytes */ | 134 | in bytes */ |
135 | unsigned int tsh_size; /* size of transport specific | ||
136 | header */ | ||
145 | 137 | ||
146 | struct rpc_wait_queue sending; /* requests waiting to send */ | 138 | struct rpc_wait_queue sending; /* requests waiting to send */ |
147 | struct rpc_wait_queue resend; /* requests waiting to resend */ | 139 | struct rpc_wait_queue resend; /* requests waiting to resend */ |
@@ -150,11 +142,9 @@ struct rpc_xprt { | |||
150 | struct list_head free; /* free slots */ | 142 | struct list_head free; /* free slots */ |
151 | struct rpc_rqst * slot; /* slot table storage */ | 143 | struct rpc_rqst * slot; /* slot table storage */ |
152 | unsigned int max_reqs; /* total slots */ | 144 | unsigned int max_reqs; /* total slots */ |
153 | unsigned long sockstate; /* Socket state */ | 145 | unsigned long state; /* transport state */ |
154 | unsigned char shutdown : 1, /* being shut down */ | 146 | unsigned char shutdown : 1, /* being shut down */ |
155 | nocong : 1, /* no congestion control */ | 147 | resvport : 1; /* use a reserved port */ |
156 | resvport : 1, /* use a reserved port */ | ||
157 | stream : 1; /* TCP */ | ||
158 | 148 | ||
159 | /* | 149 | /* |
160 | * XID | 150 | * XID |
@@ -171,22 +161,27 @@ struct rpc_xprt { | |||
171 | unsigned long tcp_copied, /* copied to request */ | 161 | unsigned long tcp_copied, /* copied to request */ |
172 | tcp_flags; | 162 | tcp_flags; |
173 | /* | 163 | /* |
174 | * Connection of sockets | 164 | * Connection of transports |
175 | */ | 165 | */ |
176 | struct work_struct sock_connect; | 166 | unsigned long connect_timeout, |
167 | bind_timeout, | ||
168 | reestablish_timeout; | ||
169 | struct work_struct connect_worker; | ||
177 | unsigned short port; | 170 | unsigned short port; |
171 | |||
178 | /* | 172 | /* |
179 | * Disconnection of idle sockets | 173 | * Disconnection of idle transports |
180 | */ | 174 | */ |
181 | struct work_struct task_cleanup; | 175 | struct work_struct task_cleanup; |
182 | struct timer_list timer; | 176 | struct timer_list timer; |
183 | unsigned long last_used; | 177 | unsigned long last_used, |
178 | idle_timeout; | ||
184 | 179 | ||
185 | /* | 180 | /* |
186 | * Send stuff | 181 | * Send stuff |
187 | */ | 182 | */ |
188 | spinlock_t sock_lock; /* lock socket info */ | 183 | spinlock_t transport_lock; /* lock transport info */ |
189 | spinlock_t xprt_lock; /* lock xprt info */ | 184 | spinlock_t reserve_lock; /* lock slot table */ |
190 | struct rpc_task * snd_task; /* Task blocked in send */ | 185 | struct rpc_task * snd_task; /* Task blocked in send */ |
191 | 186 | ||
192 | struct list_head recv; | 187 | struct list_head recv; |
@@ -195,37 +190,111 @@ struct rpc_xprt { | |||
195 | void (*old_data_ready)(struct sock *, int); | 190 | void (*old_data_ready)(struct sock *, int); |
196 | void (*old_state_change)(struct sock *); | 191 | void (*old_state_change)(struct sock *); |
197 | void (*old_write_space)(struct sock *); | 192 | void (*old_write_space)(struct sock *); |
198 | |||
199 | wait_queue_head_t cong_wait; | ||
200 | }; | 193 | }; |
201 | 194 | ||
195 | #define XPRT_LAST_FRAG (1 << 0) | ||
196 | #define XPRT_COPY_RECM (1 << 1) | ||
197 | #define XPRT_COPY_XID (1 << 2) | ||
198 | #define XPRT_COPY_DATA (1 << 3) | ||
199 | |||
202 | #ifdef __KERNEL__ | 200 | #ifdef __KERNEL__ |
203 | 201 | ||
204 | struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, | 202 | /* |
205 | struct rpc_timeout *toparms); | 203 | * Transport operations used by ULPs |
206 | int xprt_destroy(struct rpc_xprt *); | 204 | */ |
207 | void xprt_set_timeout(struct rpc_timeout *, unsigned int, | 205 | struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); |
208 | unsigned long); | 206 | void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); |
209 | 207 | ||
210 | void xprt_reserve(struct rpc_task *); | 208 | /* |
211 | int xprt_prepare_transmit(struct rpc_task *); | 209 | * Generic internal transport functions |
212 | void xprt_transmit(struct rpc_task *); | 210 | */ |
213 | void xprt_receive(struct rpc_task *); | 211 | void xprt_connect(struct rpc_task *task); |
212 | void xprt_reserve(struct rpc_task *task); | ||
213 | int xprt_reserve_xprt(struct rpc_task *task); | ||
214 | int xprt_reserve_xprt_cong(struct rpc_task *task); | ||
215 | int xprt_prepare_transmit(struct rpc_task *task); | ||
216 | void xprt_transmit(struct rpc_task *task); | ||
217 | void xprt_abort_transmit(struct rpc_task *task); | ||
214 | int xprt_adjust_timeout(struct rpc_rqst *req); | 218 | int xprt_adjust_timeout(struct rpc_rqst *req); |
215 | void xprt_release(struct rpc_task *); | 219 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); |
216 | void xprt_connect(struct rpc_task *); | 220 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); |
217 | void xprt_sock_setbufsize(struct rpc_xprt *); | 221 | void xprt_release(struct rpc_task *task); |
218 | 222 | int xprt_destroy(struct rpc_xprt *xprt); | |
219 | #define XPRT_LOCKED 0 | 223 | |
220 | #define XPRT_CONNECT 1 | 224 | static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) |
221 | #define XPRT_CONNECTING 2 | 225 | { |
222 | 226 | return p + xprt->tsh_size; | |
223 | #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) | 227 | } |
224 | #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) | 228 | |
225 | #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) | 229 | /* |
226 | #define xprt_test_and_clear_connected(xp) \ | 230 | * Transport switch helper functions |
227 | (test_and_clear_bit(XPRT_CONNECT, &(xp)->sockstate)) | 231 | */ |
228 | #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) | 232 | void xprt_set_retrans_timeout_def(struct rpc_task *task); |
233 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task); | ||
234 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); | ||
235 | void xprt_wait_for_buffer_space(struct rpc_task *task); | ||
236 | void xprt_write_space(struct rpc_xprt *xprt); | ||
237 | void xprt_update_rtt(struct rpc_task *task); | ||
238 | void xprt_adjust_cwnd(struct rpc_task *task, int result); | ||
239 | struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); | ||
240 | void xprt_complete_rqst(struct rpc_task *task, int copied); | ||
241 | void xprt_release_rqst_cong(struct rpc_task *task); | ||
242 | void xprt_disconnect(struct rpc_xprt *xprt); | ||
243 | |||
244 | /* | ||
245 | * Socket transport setup operations | ||
246 | */ | ||
247 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to); | ||
248 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); | ||
249 | |||
250 | /* | ||
251 | * Reserved bit positions in xprt->state | ||
252 | */ | ||
253 | #define XPRT_LOCKED (0) | ||
254 | #define XPRT_CONNECTED (1) | ||
255 | #define XPRT_CONNECTING (2) | ||
256 | |||
257 | static inline void xprt_set_connected(struct rpc_xprt *xprt) | ||
258 | { | ||
259 | set_bit(XPRT_CONNECTED, &xprt->state); | ||
260 | } | ||
261 | |||
262 | static inline void xprt_clear_connected(struct rpc_xprt *xprt) | ||
263 | { | ||
264 | clear_bit(XPRT_CONNECTED, &xprt->state); | ||
265 | } | ||
266 | |||
267 | static inline int xprt_connected(struct rpc_xprt *xprt) | ||
268 | { | ||
269 | return test_bit(XPRT_CONNECTED, &xprt->state); | ||
270 | } | ||
271 | |||
272 | static inline int xprt_test_and_set_connected(struct rpc_xprt *xprt) | ||
273 | { | ||
274 | return test_and_set_bit(XPRT_CONNECTED, &xprt->state); | ||
275 | } | ||
276 | |||
277 | static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt) | ||
278 | { | ||
279 | return test_and_clear_bit(XPRT_CONNECTED, &xprt->state); | ||
280 | } | ||
281 | |||
282 | static inline void xprt_clear_connecting(struct rpc_xprt *xprt) | ||
283 | { | ||
284 | smp_mb__before_clear_bit(); | ||
285 | clear_bit(XPRT_CONNECTING, &xprt->state); | ||
286 | smp_mb__after_clear_bit(); | ||
287 | } | ||
288 | |||
289 | static inline int xprt_connecting(struct rpc_xprt *xprt) | ||
290 | { | ||
291 | return test_bit(XPRT_CONNECTING, &xprt->state); | ||
292 | } | ||
293 | |||
294 | static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) | ||
295 | { | ||
296 | return test_and_set_bit(XPRT_CONNECTING, &xprt->state); | ||
297 | } | ||
229 | 298 | ||
230 | #endif /* __KERNEL__*/ | 299 | #endif /* __KERNEL__*/ |
231 | 300 | ||
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 46a2ce00a29b..cdcab9ca4c60 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -6,7 +6,7 @@ | |||
6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o |
7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ |
8 | 8 | ||
9 | sunrpc-y := clnt.o xprt.o sched.o \ | 9 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
10 | auth.o auth_null.o auth_unix.o \ | 10 | auth.o auth_null.o auth_unix.o \ |
11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
12 | pmap_clnt.o timer.o xdr.o \ | 12 | pmap_clnt.o timer.o xdr.o \ |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 505e2d4b3d62..a415d99c394d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/socket.h> | ||
15 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
16 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
17 | 16 | ||
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index fe1b874084bc..f3431a7e33da 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile | |||
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ | |||
10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o | 10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o |
11 | 11 | ||
12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ | 12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ |
13 | gss_krb5_seqnum.o | 13 | gss_krb5_seqnum.o gss_krb5_wrap.o |
14 | 14 | ||
15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o | 15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o |
16 | 16 | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 2f7b867161d2..f44f46f1d8e0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -42,9 +42,8 @@ | |||
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/types.h> | 43 | #include <linux/types.h> |
44 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
45 | #include <linux/socket.h> | ||
46 | #include <linux/in.h> | ||
47 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
46 | #include <linux/pagemap.h> | ||
48 | #include <linux/sunrpc/clnt.h> | 47 | #include <linux/sunrpc/clnt.h> |
49 | #include <linux/sunrpc/auth.h> | 48 | #include <linux/sunrpc/auth.h> |
50 | #include <linux/sunrpc/auth_gss.h> | 49 | #include <linux/sunrpc/auth_gss.h> |
@@ -846,10 +845,8 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
846 | 845 | ||
847 | /* We compute the checksum for the verifier over the xdr-encoded bytes | 846 | /* We compute the checksum for the verifier over the xdr-encoded bytes |
848 | * starting with the xid and ending at the end of the credential: */ | 847 | * starting with the xid and ending at the end of the credential: */ |
849 | iov.iov_base = req->rq_snd_buf.head[0].iov_base; | 848 | iov.iov_base = xprt_skip_transport_header(task->tk_xprt, |
850 | if (task->tk_client->cl_xprt->stream) | 849 | req->rq_snd_buf.head[0].iov_base); |
851 | /* See clnt.c:call_header() */ | ||
852 | iov.iov_base += 4; | ||
853 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; | 850 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; |
854 | xdr_buf_from_iov(&iov, &verf_buf); | 851 | xdr_buf_from_iov(&iov, &verf_buf); |
855 | 852 | ||
@@ -857,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
857 | *p++ = htonl(RPC_AUTH_GSS); | 854 | *p++ = htonl(RPC_AUTH_GSS); |
858 | 855 | ||
859 | mic.data = (u8 *)(p + 1); | 856 | mic.data = (u8 *)(p + 1); |
860 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 857 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
861 | GSS_C_QOP_DEFAULT, | ||
862 | &verf_buf, &mic); | ||
863 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { | 858 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { |
864 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 859 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
865 | } else if (maj_stat != 0) { | 860 | } else if (maj_stat != 0) { |
@@ -890,10 +885,8 @@ static u32 * | |||
890 | gss_validate(struct rpc_task *task, u32 *p) | 885 | gss_validate(struct rpc_task *task, u32 *p) |
891 | { | 886 | { |
892 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 887 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
893 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
894 | gc_base); | ||
895 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 888 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
896 | u32 seq, qop_state; | 889 | u32 seq; |
897 | struct kvec iov; | 890 | struct kvec iov; |
898 | struct xdr_buf verf_buf; | 891 | struct xdr_buf verf_buf; |
899 | struct xdr_netobj mic; | 892 | struct xdr_netobj mic; |
@@ -914,23 +907,14 @@ gss_validate(struct rpc_task *task, u32 *p) | |||
914 | mic.data = (u8 *)p; | 907 | mic.data = (u8 *)p; |
915 | mic.len = len; | 908 | mic.len = len; |
916 | 909 | ||
917 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); | 910 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
918 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 911 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
919 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 912 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
920 | if (maj_stat) | 913 | if (maj_stat) |
921 | goto out_bad; | 914 | goto out_bad; |
922 | switch (gss_cred->gc_service) { | 915 | /* We leave it to unwrap to calculate au_rslack. For now we just |
923 | case RPC_GSS_SVC_NONE: | 916 | * calculate the length of the verifier: */ |
924 | /* verifier data, flavor, length: */ | 917 | task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; |
925 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; | ||
926 | break; | ||
927 | case RPC_GSS_SVC_INTEGRITY: | ||
928 | /* verifier data, flavor, length, length, sequence number: */ | ||
929 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; | ||
930 | break; | ||
931 | case RPC_GSS_SVC_PRIVACY: | ||
932 | goto out_bad; | ||
933 | } | ||
934 | gss_put_ctx(ctx); | 918 | gss_put_ctx(ctx); |
935 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", | 919 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", |
936 | task->tk_pid); | 920 | task->tk_pid); |
@@ -975,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
975 | p = iov->iov_base + iov->iov_len; | 959 | p = iov->iov_base + iov->iov_len; |
976 | mic.data = (u8 *)(p + 1); | 960 | mic.data = (u8 *)(p + 1); |
977 | 961 | ||
978 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 962 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
979 | GSS_C_QOP_DEFAULT, &integ_buf, &mic); | ||
980 | status = -EIO; /* XXX? */ | 963 | status = -EIO; /* XXX? */ |
981 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 964 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
982 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 965 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
@@ -990,6 +973,113 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
990 | return 0; | 973 | return 0; |
991 | } | 974 | } |
992 | 975 | ||
976 | static void | ||
977 | priv_release_snd_buf(struct rpc_rqst *rqstp) | ||
978 | { | ||
979 | int i; | ||
980 | |||
981 | for (i=0; i < rqstp->rq_enc_pages_num; i++) | ||
982 | __free_page(rqstp->rq_enc_pages[i]); | ||
983 | kfree(rqstp->rq_enc_pages); | ||
984 | } | ||
985 | |||
986 | static int | ||
987 | alloc_enc_pages(struct rpc_rqst *rqstp) | ||
988 | { | ||
989 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
990 | int first, last, i; | ||
991 | |||
992 | if (snd_buf->page_len == 0) { | ||
993 | rqstp->rq_enc_pages_num = 0; | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
998 | last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; | ||
999 | rqstp->rq_enc_pages_num = last - first + 1 + 1; | ||
1000 | rqstp->rq_enc_pages | ||
1001 | = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), | ||
1002 | GFP_NOFS); | ||
1003 | if (!rqstp->rq_enc_pages) | ||
1004 | goto out; | ||
1005 | for (i=0; i < rqstp->rq_enc_pages_num; i++) { | ||
1006 | rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); | ||
1007 | if (rqstp->rq_enc_pages[i] == NULL) | ||
1008 | goto out_free; | ||
1009 | } | ||
1010 | rqstp->rq_release_snd_buf = priv_release_snd_buf; | ||
1011 | return 0; | ||
1012 | out_free: | ||
1013 | for (i--; i >= 0; i--) { | ||
1014 | __free_page(rqstp->rq_enc_pages[i]); | ||
1015 | } | ||
1016 | out: | ||
1017 | return -EAGAIN; | ||
1018 | } | ||
1019 | |||
1020 | static inline int | ||
1021 | gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
1022 | kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) | ||
1023 | { | ||
1024 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
1025 | u32 offset; | ||
1026 | u32 maj_stat; | ||
1027 | int status; | ||
1028 | u32 *opaque_len; | ||
1029 | struct page **inpages; | ||
1030 | int first; | ||
1031 | int pad; | ||
1032 | struct kvec *iov; | ||
1033 | char *tmp; | ||
1034 | |||
1035 | opaque_len = p++; | ||
1036 | offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; | ||
1037 | *p++ = htonl(rqstp->rq_seqno); | ||
1038 | |||
1039 | status = encode(rqstp, p, obj); | ||
1040 | if (status) | ||
1041 | return status; | ||
1042 | |||
1043 | status = alloc_enc_pages(rqstp); | ||
1044 | if (status) | ||
1045 | return status; | ||
1046 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
1047 | inpages = snd_buf->pages + first; | ||
1048 | snd_buf->pages = rqstp->rq_enc_pages; | ||
1049 | snd_buf->page_base -= first << PAGE_CACHE_SHIFT; | ||
1050 | /* Give the tail its own page, in case we need extra space in the | ||
1051 | * head when wrapping: */ | ||
1052 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) { | ||
1053 | tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); | ||
1054 | memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); | ||
1055 | snd_buf->tail[0].iov_base = tmp; | ||
1056 | } | ||
1057 | maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); | ||
1058 | /* RPC_SLACK_SPACE should prevent this ever happening: */ | ||
1059 | BUG_ON(snd_buf->len > snd_buf->buflen); | ||
1060 | status = -EIO; | ||
1061 | /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was | ||
1062 | * done anyway, so it's safe to put the request on the wire: */ | ||
1063 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
1064 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
1065 | else if (maj_stat) | ||
1066 | return status; | ||
1067 | |||
1068 | *opaque_len = htonl(snd_buf->len - offset); | ||
1069 | /* guess whether we're in the head or the tail: */ | ||
1070 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) | ||
1071 | iov = snd_buf->tail; | ||
1072 | else | ||
1073 | iov = snd_buf->head; | ||
1074 | p = iov->iov_base + iov->iov_len; | ||
1075 | pad = 3 - ((snd_buf->len - offset - 1) & 3); | ||
1076 | memset(p, 0, pad); | ||
1077 | iov->iov_len += pad; | ||
1078 | snd_buf->len += pad; | ||
1079 | |||
1080 | return 0; | ||
1081 | } | ||
1082 | |||
993 | static int | 1083 | static int |
994 | gss_wrap_req(struct rpc_task *task, | 1084 | gss_wrap_req(struct rpc_task *task, |
995 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) | 1085 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) |
@@ -1017,6 +1107,8 @@ gss_wrap_req(struct rpc_task *task, | |||
1017 | rqstp, p, obj); | 1107 | rqstp, p, obj); |
1018 | break; | 1108 | break; |
1019 | case RPC_GSS_SVC_PRIVACY: | 1109 | case RPC_GSS_SVC_PRIVACY: |
1110 | status = gss_wrap_req_priv(cred, ctx, encode, | ||
1111 | rqstp, p, obj); | ||
1020 | break; | 1112 | break; |
1021 | } | 1113 | } |
1022 | out: | 1114 | out: |
@@ -1054,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
1054 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) | 1146 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) |
1055 | return status; | 1147 | return status; |
1056 | 1148 | ||
1057 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, | 1149 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
1058 | &mic, NULL); | ||
1059 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 1150 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
1060 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 1151 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
1061 | if (maj_stat != GSS_S_COMPLETE) | 1152 | if (maj_stat != GSS_S_COMPLETE) |
@@ -1063,6 +1154,35 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
1063 | return 0; | 1154 | return 0; |
1064 | } | 1155 | } |
1065 | 1156 | ||
1157 | static inline int | ||
1158 | gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
1159 | struct rpc_rqst *rqstp, u32 **p) | ||
1160 | { | ||
1161 | struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; | ||
1162 | u32 offset; | ||
1163 | u32 opaque_len; | ||
1164 | u32 maj_stat; | ||
1165 | int status = -EIO; | ||
1166 | |||
1167 | opaque_len = ntohl(*(*p)++); | ||
1168 | offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; | ||
1169 | if (offset + opaque_len > rcv_buf->len) | ||
1170 | return status; | ||
1171 | /* remove padding: */ | ||
1172 | rcv_buf->len = offset + opaque_len; | ||
1173 | |||
1174 | maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); | ||
1175 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
1176 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
1177 | if (maj_stat != GSS_S_COMPLETE) | ||
1178 | return status; | ||
1179 | if (ntohl(*(*p)++) != rqstp->rq_seqno) | ||
1180 | return status; | ||
1181 | |||
1182 | return 0; | ||
1183 | } | ||
1184 | |||
1185 | |||
1066 | static int | 1186 | static int |
1067 | gss_unwrap_resp(struct rpc_task *task, | 1187 | gss_unwrap_resp(struct rpc_task *task, |
1068 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) | 1188 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) |
@@ -1071,6 +1191,9 @@ gss_unwrap_resp(struct rpc_task *task, | |||
1071 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | 1191 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, |
1072 | gc_base); | 1192 | gc_base); |
1073 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 1193 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
1194 | u32 *savedp = p; | ||
1195 | struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; | ||
1196 | int savedlen = head->iov_len; | ||
1074 | int status = -EIO; | 1197 | int status = -EIO; |
1075 | 1198 | ||
1076 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) | 1199 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) |
@@ -1084,8 +1207,14 @@ gss_unwrap_resp(struct rpc_task *task, | |||
1084 | goto out; | 1207 | goto out; |
1085 | break; | 1208 | break; |
1086 | case RPC_GSS_SVC_PRIVACY: | 1209 | case RPC_GSS_SVC_PRIVACY: |
1210 | status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); | ||
1211 | if (status) | ||
1212 | goto out; | ||
1087 | break; | 1213 | break; |
1088 | } | 1214 | } |
1215 | /* take into account extra slack for integrity and privacy cases: */ | ||
1216 | task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) | ||
1217 | + (savedlen - head->iov_len); | ||
1089 | out_decode: | 1218 | out_decode: |
1090 | status = decode(rqstp, p, obj); | 1219 | status = decode(rqstp, p, obj); |
1091 | out: | 1220 | out: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index ee6ae74cd1b2..3f3d5437f02d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c | |||
@@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) { | |||
139 | sg->length = len; | 139 | sg->length = len; |
140 | } | 140 | } |
141 | 141 | ||
142 | static int | ||
143 | process_xdr_buf(struct xdr_buf *buf, int offset, int len, | ||
144 | int (*actor)(struct scatterlist *, void *), void *data) | ||
145 | { | ||
146 | int i, page_len, thislen, page_offset, ret = 0; | ||
147 | struct scatterlist sg[1]; | ||
148 | |||
149 | if (offset >= buf->head[0].iov_len) { | ||
150 | offset -= buf->head[0].iov_len; | ||
151 | } else { | ||
152 | thislen = buf->head[0].iov_len - offset; | ||
153 | if (thislen > len) | ||
154 | thislen = len; | ||
155 | buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); | ||
156 | ret = actor(sg, data); | ||
157 | if (ret) | ||
158 | goto out; | ||
159 | offset = 0; | ||
160 | len -= thislen; | ||
161 | } | ||
162 | if (len == 0) | ||
163 | goto out; | ||
164 | |||
165 | if (offset >= buf->page_len) { | ||
166 | offset -= buf->page_len; | ||
167 | } else { | ||
168 | page_len = buf->page_len - offset; | ||
169 | if (page_len > len) | ||
170 | page_len = len; | ||
171 | len -= page_len; | ||
172 | page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); | ||
173 | i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; | ||
174 | thislen = PAGE_CACHE_SIZE - page_offset; | ||
175 | do { | ||
176 | if (thislen > page_len) | ||
177 | thislen = page_len; | ||
178 | sg->page = buf->pages[i]; | ||
179 | sg->offset = page_offset; | ||
180 | sg->length = thislen; | ||
181 | ret = actor(sg, data); | ||
182 | if (ret) | ||
183 | goto out; | ||
184 | page_len -= thislen; | ||
185 | i++; | ||
186 | page_offset = 0; | ||
187 | thislen = PAGE_CACHE_SIZE; | ||
188 | } while (page_len != 0); | ||
189 | offset = 0; | ||
190 | } | ||
191 | if (len == 0) | ||
192 | goto out; | ||
193 | |||
194 | if (offset < buf->tail[0].iov_len) { | ||
195 | thislen = buf->tail[0].iov_len - offset; | ||
196 | if (thislen > len) | ||
197 | thislen = len; | ||
198 | buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); | ||
199 | ret = actor(sg, data); | ||
200 | len -= thislen; | ||
201 | } | ||
202 | if (len != 0) | ||
203 | ret = -EINVAL; | ||
204 | out: | ||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | static int | ||
209 | checksummer(struct scatterlist *sg, void *data) | ||
210 | { | ||
211 | struct crypto_tfm *tfm = (struct crypto_tfm *)data; | ||
212 | |||
213 | crypto_digest_update(tfm, sg, 1); | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
142 | /* checksum the plaintext data and hdrlen bytes of the token header */ | 218 | /* checksum the plaintext data and hdrlen bytes of the token header */ |
143 | s32 | 219 | s32 |
144 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | 220 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, |
145 | struct xdr_netobj *cksum) | 221 | int body_offset, struct xdr_netobj *cksum) |
146 | { | 222 | { |
147 | char *cksumname; | 223 | char *cksumname; |
148 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ | 224 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ |
149 | struct scatterlist sg[1]; | 225 | struct scatterlist sg[1]; |
150 | u32 code = GSS_S_FAILURE; | 226 | u32 code = GSS_S_FAILURE; |
151 | int len, thislen, offset; | ||
152 | int i; | ||
153 | 227 | ||
154 | switch (cksumtype) { | 228 | switch (cksumtype) { |
155 | case CKSUMTYPE_RSA_MD5: | 229 | case CKSUMTYPE_RSA_MD5: |
@@ -169,33 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | |||
169 | crypto_digest_init(tfm); | 243 | crypto_digest_init(tfm); |
170 | buf_to_sg(sg, header, hdrlen); | 244 | buf_to_sg(sg, header, hdrlen); |
171 | crypto_digest_update(tfm, sg, 1); | 245 | crypto_digest_update(tfm, sg, 1); |
172 | if (body->head[0].iov_len) { | 246 | process_xdr_buf(body, body_offset, body->len - body_offset, |
173 | buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); | 247 | checksummer, tfm); |
174 | crypto_digest_update(tfm, sg, 1); | ||
175 | } | ||
176 | |||
177 | len = body->page_len; | ||
178 | if (len != 0) { | ||
179 | offset = body->page_base & (PAGE_CACHE_SIZE - 1); | ||
180 | i = body->page_base >> PAGE_CACHE_SHIFT; | ||
181 | thislen = PAGE_CACHE_SIZE - offset; | ||
182 | do { | ||
183 | if (thislen > len) | ||
184 | thislen = len; | ||
185 | sg->page = body->pages[i]; | ||
186 | sg->offset = offset; | ||
187 | sg->length = thislen; | ||
188 | crypto_digest_update(tfm, sg, 1); | ||
189 | len -= thislen; | ||
190 | i++; | ||
191 | offset = 0; | ||
192 | thislen = PAGE_CACHE_SIZE; | ||
193 | } while(len != 0); | ||
194 | } | ||
195 | if (body->tail[0].iov_len) { | ||
196 | buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); | ||
197 | crypto_digest_update(tfm, sg, 1); | ||
198 | } | ||
199 | crypto_digest_final(tfm, cksum->data); | 248 | crypto_digest_final(tfm, cksum->data); |
200 | code = 0; | 249 | code = 0; |
201 | out: | 250 | out: |
@@ -204,3 +253,154 @@ out: | |||
204 | } | 253 | } |
205 | 254 | ||
206 | EXPORT_SYMBOL(make_checksum); | 255 | EXPORT_SYMBOL(make_checksum); |
256 | |||
257 | struct encryptor_desc { | ||
258 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
259 | struct crypto_tfm *tfm; | ||
260 | int pos; | ||
261 | struct xdr_buf *outbuf; | ||
262 | struct page **pages; | ||
263 | struct scatterlist infrags[4]; | ||
264 | struct scatterlist outfrags[4]; | ||
265 | int fragno; | ||
266 | int fraglen; | ||
267 | }; | ||
268 | |||
269 | static int | ||
270 | encryptor(struct scatterlist *sg, void *data) | ||
271 | { | ||
272 | struct encryptor_desc *desc = data; | ||
273 | struct xdr_buf *outbuf = desc->outbuf; | ||
274 | struct page *in_page; | ||
275 | int thislen = desc->fraglen + sg->length; | ||
276 | int fraglen, ret; | ||
277 | int page_pos; | ||
278 | |||
279 | /* Worst case is 4 fragments: head, end of page 1, start | ||
280 | * of page 2, tail. Anything more is a bug. */ | ||
281 | BUG_ON(desc->fragno > 3); | ||
282 | desc->infrags[desc->fragno] = *sg; | ||
283 | desc->outfrags[desc->fragno] = *sg; | ||
284 | |||
285 | page_pos = desc->pos - outbuf->head[0].iov_len; | ||
286 | if (page_pos >= 0 && page_pos < outbuf->page_len) { | ||
287 | /* pages are not in place: */ | ||
288 | int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; | ||
289 | in_page = desc->pages[i]; | ||
290 | } else { | ||
291 | in_page = sg->page; | ||
292 | } | ||
293 | desc->infrags[desc->fragno].page = in_page; | ||
294 | desc->fragno++; | ||
295 | desc->fraglen += sg->length; | ||
296 | desc->pos += sg->length; | ||
297 | |||
298 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
299 | thislen -= fraglen; | ||
300 | |||
301 | if (thislen == 0) | ||
302 | return 0; | ||
303 | |||
304 | ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, | ||
305 | thislen, desc->iv); | ||
306 | if (ret) | ||
307 | return ret; | ||
308 | if (fraglen) { | ||
309 | desc->outfrags[0].page = sg->page; | ||
310 | desc->outfrags[0].offset = sg->offset + sg->length - fraglen; | ||
311 | desc->outfrags[0].length = fraglen; | ||
312 | desc->infrags[0] = desc->outfrags[0]; | ||
313 | desc->infrags[0].page = in_page; | ||
314 | desc->fragno = 1; | ||
315 | desc->fraglen = fraglen; | ||
316 | } else { | ||
317 | desc->fragno = 0; | ||
318 | desc->fraglen = 0; | ||
319 | } | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | int | ||
324 | gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, | ||
325 | struct page **pages) | ||
326 | { | ||
327 | int ret; | ||
328 | struct encryptor_desc desc; | ||
329 | |||
330 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
331 | |||
332 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
333 | desc.tfm = tfm; | ||
334 | desc.pos = offset; | ||
335 | desc.outbuf = buf; | ||
336 | desc.pages = pages; | ||
337 | desc.fragno = 0; | ||
338 | desc.fraglen = 0; | ||
339 | |||
340 | ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); | ||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | EXPORT_SYMBOL(gss_encrypt_xdr_buf); | ||
345 | |||
346 | struct decryptor_desc { | ||
347 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
348 | struct crypto_tfm *tfm; | ||
349 | struct scatterlist frags[4]; | ||
350 | int fragno; | ||
351 | int fraglen; | ||
352 | }; | ||
353 | |||
354 | static int | ||
355 | decryptor(struct scatterlist *sg, void *data) | ||
356 | { | ||
357 | struct decryptor_desc *desc = data; | ||
358 | int thislen = desc->fraglen + sg->length; | ||
359 | int fraglen, ret; | ||
360 | |||
361 | /* Worst case is 4 fragments: head, end of page 1, start | ||
362 | * of page 2, tail. Anything more is a bug. */ | ||
363 | BUG_ON(desc->fragno > 3); | ||
364 | desc->frags[desc->fragno] = *sg; | ||
365 | desc->fragno++; | ||
366 | desc->fraglen += sg->length; | ||
367 | |||
368 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
369 | thislen -= fraglen; | ||
370 | |||
371 | if (thislen == 0) | ||
372 | return 0; | ||
373 | |||
374 | ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, | ||
375 | thislen, desc->iv); | ||
376 | if (ret) | ||
377 | return ret; | ||
378 | if (fraglen) { | ||
379 | desc->frags[0].page = sg->page; | ||
380 | desc->frags[0].offset = sg->offset + sg->length - fraglen; | ||
381 | desc->frags[0].length = fraglen; | ||
382 | desc->fragno = 1; | ||
383 | desc->fraglen = fraglen; | ||
384 | } else { | ||
385 | desc->fragno = 0; | ||
386 | desc->fraglen = 0; | ||
387 | } | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int | ||
392 | gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) | ||
393 | { | ||
394 | struct decryptor_desc desc; | ||
395 | |||
396 | /* XXXJBF: */ | ||
397 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
398 | |||
399 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
400 | desc.tfm = tfm; | ||
401 | desc.fragno = 0; | ||
402 | desc.fraglen = 0; | ||
403 | return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); | ||
404 | } | ||
405 | |||
406 | EXPORT_SYMBOL(gss_decrypt_xdr_buf); | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 606a8a82cafb..5f1f806a0b11 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/types.h> | 39 | #include <linux/types.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/sunrpc/auth.h> | 41 | #include <linux/sunrpc/auth.h> |
42 | #include <linux/in.h> | ||
43 | #include <linux/sunrpc/gss_krb5.h> | 42 | #include <linux/sunrpc/gss_krb5.h> |
44 | #include <linux/sunrpc/xdr.h> | 43 | #include <linux/sunrpc/xdr.h> |
45 | #include <linux/crypto.h> | 44 | #include <linux/crypto.h> |
@@ -191,43 +190,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { | |||
191 | kfree(kctx); | 190 | kfree(kctx); |
192 | } | 191 | } |
193 | 192 | ||
194 | static u32 | ||
195 | gss_verify_mic_kerberos(struct gss_ctx *ctx, | ||
196 | struct xdr_buf *message, | ||
197 | struct xdr_netobj *mic_token, | ||
198 | u32 *qstate) { | ||
199 | u32 maj_stat = 0; | ||
200 | int qop_state; | ||
201 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
202 | |||
203 | maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, | ||
204 | KG_TOK_MIC_MSG); | ||
205 | if (!maj_stat && qop_state) | ||
206 | *qstate = qop_state; | ||
207 | |||
208 | dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); | ||
209 | return maj_stat; | ||
210 | } | ||
211 | |||
212 | static u32 | ||
213 | gss_get_mic_kerberos(struct gss_ctx *ctx, | ||
214 | u32 qop, | ||
215 | struct xdr_buf *message, | ||
216 | struct xdr_netobj *mic_token) { | ||
217 | u32 err = 0; | ||
218 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
219 | |||
220 | err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); | ||
221 | |||
222 | dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); | ||
223 | |||
224 | return err; | ||
225 | } | ||
226 | |||
227 | static struct gss_api_ops gss_kerberos_ops = { | 193 | static struct gss_api_ops gss_kerberos_ops = { |
228 | .gss_import_sec_context = gss_import_sec_context_kerberos, | 194 | .gss_import_sec_context = gss_import_sec_context_kerberos, |
229 | .gss_get_mic = gss_get_mic_kerberos, | 195 | .gss_get_mic = gss_get_mic_kerberos, |
230 | .gss_verify_mic = gss_verify_mic_kerberos, | 196 | .gss_verify_mic = gss_verify_mic_kerberos, |
197 | .gss_wrap = gss_wrap_kerberos, | ||
198 | .gss_unwrap = gss_unwrap_kerberos, | ||
231 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, | 199 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, |
232 | }; | 200 | }; |
233 | 201 | ||
@@ -242,6 +210,11 @@ static struct pf_desc gss_kerberos_pfs[] = { | |||
242 | .service = RPC_GSS_SVC_INTEGRITY, | 210 | .service = RPC_GSS_SVC_INTEGRITY, |
243 | .name = "krb5i", | 211 | .name = "krb5i", |
244 | }, | 212 | }, |
213 | [2] = { | ||
214 | .pseudoflavor = RPC_AUTH_GSS_KRB5P, | ||
215 | .service = RPC_GSS_SVC_PRIVACY, | ||
216 | .name = "krb5p", | ||
217 | }, | ||
245 | }; | 218 | }; |
246 | 219 | ||
247 | static struct gss_api_mech gss_kerberos_mech = { | 220 | static struct gss_api_mech gss_kerberos_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index afeeb8715a77..13f8ae979454 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c | |||
@@ -70,22 +70,13 @@ | |||
70 | # define RPCDBG_FACILITY RPCDBG_AUTH | 70 | # define RPCDBG_FACILITY RPCDBG_AUTH |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | static inline int | ||
74 | gss_krb5_padding(int blocksize, int length) { | ||
75 | /* Most of the code is block-size independent but in practice we | ||
76 | * use only 8: */ | ||
77 | BUG_ON(blocksize != 8); | ||
78 | return 8 - (length & 7); | ||
79 | } | ||
80 | |||
81 | u32 | 73 | u32 |
82 | krb5_make_token(struct krb5_ctx *ctx, int qop_req, | 74 | gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, |
83 | struct xdr_buf *text, struct xdr_netobj *token, | 75 | struct xdr_netobj *token) |
84 | int toktype) | ||
85 | { | 76 | { |
77 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
86 | s32 checksum_type; | 78 | s32 checksum_type; |
87 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | 79 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; |
88 | int blocksize = 0, tmsglen; | ||
89 | unsigned char *ptr, *krb5_hdr, *msg_start; | 80 | unsigned char *ptr, *krb5_hdr, *msg_start; |
90 | s32 now; | 81 | s32 now; |
91 | 82 | ||
@@ -93,9 +84,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
93 | 84 | ||
94 | now = get_seconds(); | 85 | now = get_seconds(); |
95 | 86 | ||
96 | if (qop_req != 0) | ||
97 | goto out_err; | ||
98 | |||
99 | switch (ctx->signalg) { | 87 | switch (ctx->signalg) { |
100 | case SGN_ALG_DES_MAC_MD5: | 88 | case SGN_ALG_DES_MAC_MD5: |
101 | checksum_type = CKSUMTYPE_RSA_MD5; | 89 | checksum_type = CKSUMTYPE_RSA_MD5; |
@@ -111,21 +99,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
111 | goto out_err; | 99 | goto out_err; |
112 | } | 100 | } |
113 | 101 | ||
114 | if (toktype == KG_TOK_WRAP_MSG) { | 102 | token->len = g_token_size(&ctx->mech_used, 22); |
115 | blocksize = crypto_tfm_alg_blocksize(ctx->enc); | ||
116 | tmsglen = blocksize + text->len | ||
117 | + gss_krb5_padding(blocksize, blocksize + text->len); | ||
118 | } else { | ||
119 | tmsglen = 0; | ||
120 | } | ||
121 | |||
122 | token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); | ||
123 | 103 | ||
124 | ptr = token->data; | 104 | ptr = token->data; |
125 | g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); | 105 | g_make_token_header(&ctx->mech_used, 22, &ptr); |
126 | 106 | ||
127 | *ptr++ = (unsigned char) ((toktype>>8)&0xff); | 107 | *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); |
128 | *ptr++ = (unsigned char) (toktype&0xff); | 108 | *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); |
129 | 109 | ||
130 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | 110 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ |
131 | krb5_hdr = ptr - 2; | 111 | krb5_hdr = ptr - 2; |
@@ -133,17 +113,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
133 | 113 | ||
134 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); | 114 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); |
135 | memset(krb5_hdr + 4, 0xff, 4); | 115 | memset(krb5_hdr + 4, 0xff, 4); |
136 | if (toktype == KG_TOK_WRAP_MSG) | ||
137 | *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); | ||
138 | 116 | ||
139 | if (toktype == KG_TOK_WRAP_MSG) { | 117 | if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) |
140 | /* XXX removing support for now */ | ||
141 | goto out_err; | ||
142 | } else { /* Sign only. */ | ||
143 | if (make_checksum(checksum_type, krb5_hdr, 8, text, | ||
144 | &md5cksum)) | ||
145 | goto out_err; | 118 | goto out_err; |
146 | } | ||
147 | 119 | ||
148 | switch (ctx->signalg) { | 120 | switch (ctx->signalg) { |
149 | case SGN_ALG_DES_MAC_MD5: | 121 | case SGN_ALG_DES_MAC_MD5: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8767fc53183d..2030475d98ed 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c | |||
@@ -68,21 +68,14 @@ | |||
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | 70 | ||
71 | /* message_buffer is an input if toktype is MIC and an output if it is WRAP: | 71 | /* read_token is a mic token, and message_buffer is the data that the mic was |
72 | * If toktype is MIC: read_token is a mic token, and message_buffer is the | 72 | * supposedly taken over. */ |
73 | * data that the mic was supposedly taken over. | ||
74 | * If toktype is WRAP: read_token is a wrap token, and message_buffer is used | ||
75 | * to return the decrypted data. | ||
76 | */ | ||
77 | 73 | ||
78 | /* XXX will need to change prototype and/or just split into a separate function | ||
79 | * when we add privacy (because read_token will be in pages too). */ | ||
80 | u32 | 74 | u32 |
81 | krb5_read_token(struct krb5_ctx *ctx, | 75 | gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, |
82 | struct xdr_netobj *read_token, | 76 | struct xdr_buf *message_buffer, struct xdr_netobj *read_token) |
83 | struct xdr_buf *message_buffer, | ||
84 | int *qop_state, int toktype) | ||
85 | { | 77 | { |
78 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
86 | int signalg; | 79 | int signalg; |
87 | int sealalg; | 80 | int sealalg; |
88 | s32 checksum_type; | 81 | s32 checksum_type; |
@@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
100 | read_token->len)) | 93 | read_token->len)) |
101 | goto out; | 94 | goto out; |
102 | 95 | ||
103 | if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) | 96 | if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || |
97 | (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) | ||
104 | goto out; | 98 | goto out; |
105 | 99 | ||
106 | /* XXX sanity-check bodysize?? */ | 100 | /* XXX sanity-check bodysize?? */ |
107 | 101 | ||
108 | if (toktype == KG_TOK_WRAP_MSG) { | ||
109 | /* XXX gone */ | ||
110 | goto out; | ||
111 | } | ||
112 | |||
113 | /* get the sign and seal algorithms */ | 102 | /* get the sign and seal algorithms */ |
114 | 103 | ||
115 | signalg = ptr[0] + (ptr[1] << 8); | 104 | signalg = ptr[0] + (ptr[1] << 8); |
@@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
120 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | 109 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) |
121 | goto out; | 110 | goto out; |
122 | 111 | ||
123 | if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || | 112 | if (sealalg != 0xffff) |
124 | ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) | ||
125 | goto out; | ||
126 | |||
127 | /* in the current spec, there is only one valid seal algorithm per | ||
128 | key type, so a simple comparison is ok */ | ||
129 | |||
130 | if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) | ||
131 | goto out; | 113 | goto out; |
132 | 114 | ||
133 | /* there are several mappings of seal algorithms to sign algorithms, | 115 | /* there are several mappings of seal algorithms to sign algorithms, |
@@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
154 | switch (signalg) { | 136 | switch (signalg) { |
155 | case SGN_ALG_DES_MAC_MD5: | 137 | case SGN_ALG_DES_MAC_MD5: |
156 | ret = make_checksum(checksum_type, ptr - 2, 8, | 138 | ret = make_checksum(checksum_type, ptr - 2, 8, |
157 | message_buffer, &md5cksum); | 139 | message_buffer, 0, &md5cksum); |
158 | if (ret) | 140 | if (ret) |
159 | goto out; | 141 | goto out; |
160 | 142 | ||
@@ -175,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
175 | 157 | ||
176 | /* it got through unscathed. Make sure the context is unexpired */ | 158 | /* it got through unscathed. Make sure the context is unexpired */ |
177 | 159 | ||
178 | if (qop_state) | ||
179 | *qop_state = GSS_C_QOP_DEFAULT; | ||
180 | |||
181 | now = get_seconds(); | 160 | now = get_seconds(); |
182 | 161 | ||
183 | ret = GSS_S_CONTEXT_EXPIRED; | 162 | ret = GSS_S_CONTEXT_EXPIRED; |
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c new file mode 100644 index 000000000000..af777cf9f251 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c | |||
@@ -0,0 +1,363 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/jiffies.h> | ||
4 | #include <linux/sunrpc/gss_krb5.h> | ||
5 | #include <linux/random.h> | ||
6 | #include <linux/pagemap.h> | ||
7 | #include <asm/scatterlist.h> | ||
8 | #include <linux/crypto.h> | ||
9 | |||
10 | #ifdef RPC_DEBUG | ||
11 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
12 | #endif | ||
13 | |||
14 | static inline int | ||
15 | gss_krb5_padding(int blocksize, int length) | ||
16 | { | ||
17 | /* Most of the code is block-size independent but currently we | ||
18 | * use only 8: */ | ||
19 | BUG_ON(blocksize != 8); | ||
20 | return 8 - (length & 7); | ||
21 | } | ||
22 | |||
23 | static inline void | ||
24 | gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) | ||
25 | { | ||
26 | int padding = gss_krb5_padding(blocksize, buf->len - offset); | ||
27 | char *p; | ||
28 | struct kvec *iov; | ||
29 | |||
30 | if (buf->page_len || buf->tail[0].iov_len) | ||
31 | iov = &buf->tail[0]; | ||
32 | else | ||
33 | iov = &buf->head[0]; | ||
34 | p = iov->iov_base + iov->iov_len; | ||
35 | iov->iov_len += padding; | ||
36 | buf->len += padding; | ||
37 | memset(p, padding, padding); | ||
38 | } | ||
39 | |||
40 | static inline int | ||
41 | gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) | ||
42 | { | ||
43 | u8 *ptr; | ||
44 | u8 pad; | ||
45 | int len = buf->len; | ||
46 | |||
47 | if (len <= buf->head[0].iov_len) { | ||
48 | pad = *(u8 *)(buf->head[0].iov_base + len - 1); | ||
49 | if (pad > buf->head[0].iov_len) | ||
50 | return -EINVAL; | ||
51 | buf->head[0].iov_len -= pad; | ||
52 | goto out; | ||
53 | } else | ||
54 | len -= buf->head[0].iov_len; | ||
55 | if (len <= buf->page_len) { | ||
56 | int last = (buf->page_base + len - 1) | ||
57 | >>PAGE_CACHE_SHIFT; | ||
58 | int offset = (buf->page_base + len - 1) | ||
59 | & (PAGE_CACHE_SIZE - 1); | ||
60 | ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); | ||
61 | pad = *(ptr + offset); | ||
62 | kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); | ||
63 | goto out; | ||
64 | } else | ||
65 | len -= buf->page_len; | ||
66 | BUG_ON(len > buf->tail[0].iov_len); | ||
67 | pad = *(u8 *)(buf->tail[0].iov_base + len - 1); | ||
68 | out: | ||
69 | /* XXX: NOTE: we do not adjust the page lengths--they represent | ||
70 | * a range of data in the real filesystem page cache, and we need | ||
71 | * to know that range so the xdr code can properly place read data. | ||
72 | * However adjusting the head length, as we do above, is harmless. | ||
73 | * In the case of a request that fits into a single page, the server | ||
74 | * also uses length and head length together to determine the original | ||
75 | * start of the request to copy the request for deferal; so it's | ||
76 | * easier on the server if we adjust head and tail length in tandem. | ||
77 | * It's not really a problem that we don't fool with the page and | ||
78 | * tail lengths, though--at worst badly formed xdr might lead the | ||
79 | * server to attempt to parse the padding. | ||
80 | * XXX: Document all these weird requirements for gss mechanism | ||
81 | * wrap/unwrap functions. */ | ||
82 | if (pad > blocksize) | ||
83 | return -EINVAL; | ||
84 | if (buf->len > pad) | ||
85 | buf->len -= pad; | ||
86 | else | ||
87 | return -EINVAL; | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static inline void | ||
92 | make_confounder(char *p, int blocksize) | ||
93 | { | ||
94 | static u64 i = 0; | ||
95 | u64 *q = (u64 *)p; | ||
96 | |||
97 | /* rfc1964 claims this should be "random". But all that's really | ||
98 | * necessary is that it be unique. And not even that is necessary in | ||
99 | * our case since our "gssapi" implementation exists only to support | ||
100 | * rpcsec_gss, so we know that the only buffers we will ever encrypt | ||
101 | * already begin with a unique sequence number. Just to hedge my bets | ||
102 | * I'll make a half-hearted attempt at something unique, but ensuring | ||
103 | * uniqueness would mean worrying about atomicity and rollover, and I | ||
104 | * don't care enough. */ | ||
105 | |||
106 | BUG_ON(blocksize != 8); | ||
107 | *q = i++; | ||
108 | } | ||
109 | |||
110 | /* Assumptions: the head and tail of inbuf are ours to play with. | ||
111 | * The pages, however, may be real pages in the page cache and we replace | ||
112 | * them with scratch pages from **pages before writing to them. */ | ||
113 | /* XXX: obviously the above should be documentation of wrap interface, | ||
114 | * and shouldn't be in this kerberos-specific file. */ | ||
115 | |||
116 | /* XXX factor out common code with seal/unseal. */ | ||
117 | |||
118 | u32 | ||
119 | gss_wrap_kerberos(struct gss_ctx *ctx, int offset, | ||
120 | struct xdr_buf *buf, struct page **pages) | ||
121 | { | ||
122 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
123 | s32 checksum_type; | ||
124 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
125 | int blocksize = 0, plainlen; | ||
126 | unsigned char *ptr, *krb5_hdr, *msg_start; | ||
127 | s32 now; | ||
128 | int headlen; | ||
129 | struct page **tmp_pages; | ||
130 | |||
131 | dprintk("RPC: gss_wrap_kerberos\n"); | ||
132 | |||
133 | now = get_seconds(); | ||
134 | |||
135 | switch (kctx->signalg) { | ||
136 | case SGN_ALG_DES_MAC_MD5: | ||
137 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
138 | break; | ||
139 | default: | ||
140 | dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" | ||
141 | " supported\n", kctx->signalg); | ||
142 | goto out_err; | ||
143 | } | ||
144 | if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { | ||
145 | dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", | ||
146 | kctx->sealalg); | ||
147 | goto out_err; | ||
148 | } | ||
149 | |||
150 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
151 | gss_krb5_add_padding(buf, offset, blocksize); | ||
152 | BUG_ON((buf->len - offset) % blocksize); | ||
153 | plainlen = blocksize + buf->len - offset; | ||
154 | |||
155 | headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - | ||
156 | (buf->len - offset); | ||
157 | |||
158 | ptr = buf->head[0].iov_base + offset; | ||
159 | /* shift data to make room for header. */ | ||
160 | /* XXX Would be cleverer to encrypt while copying. */ | ||
161 | /* XXX bounds checking, slack, etc. */ | ||
162 | memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); | ||
163 | buf->head[0].iov_len += headlen; | ||
164 | buf->len += headlen; | ||
165 | BUG_ON((buf->len - offset - headlen) % blocksize); | ||
166 | |||
167 | g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); | ||
168 | |||
169 | |||
170 | *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); | ||
171 | *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); | ||
172 | |||
173 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | ||
174 | krb5_hdr = ptr - 2; | ||
175 | msg_start = krb5_hdr + 24; | ||
176 | /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); | ||
177 | |||
178 | *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); | ||
179 | memset(krb5_hdr + 4, 0xff, 4); | ||
180 | *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); | ||
181 | |||
182 | make_confounder(msg_start, blocksize); | ||
183 | |||
184 | /* XXXJBF: UGH!: */ | ||
185 | tmp_pages = buf->pages; | ||
186 | buf->pages = pages; | ||
187 | if (make_checksum(checksum_type, krb5_hdr, 8, buf, | ||
188 | offset + headlen - blocksize, &md5cksum)) | ||
189 | goto out_err; | ||
190 | buf->pages = tmp_pages; | ||
191 | |||
192 | switch (kctx->signalg) { | ||
193 | case SGN_ALG_DES_MAC_MD5: | ||
194 | if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
195 | md5cksum.data, md5cksum.len)) | ||
196 | goto out_err; | ||
197 | memcpy(krb5_hdr + 16, | ||
198 | md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, | ||
199 | KRB5_CKSUM_LENGTH); | ||
200 | |||
201 | dprintk("RPC: make_seal_token: cksum data: \n"); | ||
202 | print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); | ||
203 | break; | ||
204 | default: | ||
205 | BUG(); | ||
206 | } | ||
207 | |||
208 | kfree(md5cksum.data); | ||
209 | |||
210 | /* XXX would probably be more efficient to compute checksum | ||
211 | * and encrypt at the same time: */ | ||
212 | if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, | ||
213 | kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) | ||
214 | goto out_err; | ||
215 | |||
216 | if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, | ||
217 | pages)) | ||
218 | goto out_err; | ||
219 | |||
220 | kctx->seq_send++; | ||
221 | |||
222 | return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | ||
223 | out_err: | ||
224 | if (md5cksum.data) kfree(md5cksum.data); | ||
225 | return GSS_S_FAILURE; | ||
226 | } | ||
227 | |||
228 | u32 | ||
229 | gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) | ||
230 | { | ||
231 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
232 | int signalg; | ||
233 | int sealalg; | ||
234 | s32 checksum_type; | ||
235 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
236 | s32 now; | ||
237 | int direction; | ||
238 | s32 seqnum; | ||
239 | unsigned char *ptr; | ||
240 | int bodysize; | ||
241 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
242 | void *data_start, *orig_start; | ||
243 | int data_len; | ||
244 | int blocksize; | ||
245 | |||
246 | dprintk("RPC: gss_unwrap_kerberos\n"); | ||
247 | |||
248 | ptr = (u8 *)buf->head[0].iov_base + offset; | ||
249 | if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, | ||
250 | buf->len - offset)) | ||
251 | goto out; | ||
252 | |||
253 | if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || | ||
254 | (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) | ||
255 | goto out; | ||
256 | |||
257 | /* XXX sanity-check bodysize?? */ | ||
258 | |||
259 | /* get the sign and seal algorithms */ | ||
260 | |||
261 | signalg = ptr[0] + (ptr[1] << 8); | ||
262 | sealalg = ptr[2] + (ptr[3] << 8); | ||
263 | |||
264 | /* Sanity checks */ | ||
265 | |||
266 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | ||
267 | goto out; | ||
268 | |||
269 | if (sealalg == 0xffff) | ||
270 | goto out; | ||
271 | |||
272 | /* in the current spec, there is only one valid seal algorithm per | ||
273 | key type, so a simple comparison is ok */ | ||
274 | |||
275 | if (sealalg != kctx->sealalg) | ||
276 | goto out; | ||
277 | |||
278 | /* there are several mappings of seal algorithms to sign algorithms, | ||
279 | but few enough that we can try them all. */ | ||
280 | |||
281 | if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || | ||
282 | (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || | ||
283 | (kctx->sealalg == SEAL_ALG_DES3KD && | ||
284 | signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) | ||
285 | goto out; | ||
286 | |||
287 | if (gss_decrypt_xdr_buf(kctx->enc, buf, | ||
288 | ptr + 22 - (unsigned char *)buf->head[0].iov_base)) | ||
289 | goto out; | ||
290 | |||
291 | /* compute the checksum of the message */ | ||
292 | |||
293 | /* initialize the the cksum */ | ||
294 | switch (signalg) { | ||
295 | case SGN_ALG_DES_MAC_MD5: | ||
296 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
297 | break; | ||
298 | default: | ||
299 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | switch (signalg) { | ||
304 | case SGN_ALG_DES_MAC_MD5: | ||
305 | ret = make_checksum(checksum_type, ptr - 2, 8, buf, | ||
306 | ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); | ||
307 | if (ret) | ||
308 | goto out; | ||
309 | |||
310 | ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
311 | md5cksum.data, md5cksum.len); | ||
312 | if (ret) | ||
313 | goto out; | ||
314 | |||
315 | if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { | ||
316 | ret = GSS_S_BAD_SIG; | ||
317 | goto out; | ||
318 | } | ||
319 | break; | ||
320 | default: | ||
321 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
322 | goto out; | ||
323 | } | ||
324 | |||
325 | /* it got through unscathed. Make sure the context is unexpired */ | ||
326 | |||
327 | now = get_seconds(); | ||
328 | |||
329 | ret = GSS_S_CONTEXT_EXPIRED; | ||
330 | if (now > kctx->endtime) | ||
331 | goto out; | ||
332 | |||
333 | /* do sequencing checks */ | ||
334 | |||
335 | ret = GSS_S_BAD_SIG; | ||
336 | if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, | ||
337 | &seqnum))) | ||
338 | goto out; | ||
339 | |||
340 | if ((kctx->initiate && direction != 0xff) || | ||
341 | (!kctx->initiate && direction != 0)) | ||
342 | goto out; | ||
343 | |||
344 | /* Copy the data back to the right position. XXX: Would probably be | ||
345 | * better to copy and encrypt at the same time. */ | ||
346 | |||
347 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
348 | data_start = ptr + 22 + blocksize; | ||
349 | orig_start = buf->head[0].iov_base + offset; | ||
350 | data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; | ||
351 | memmove(orig_start, data_start, data_len); | ||
352 | buf->head[0].iov_len -= (data_start - orig_start); | ||
353 | buf->len -= (data_start - orig_start); | ||
354 | |||
355 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
356 | if (gss_krb5_remove_padding(buf, blocksize)) | ||
357 | goto out; | ||
358 | |||
359 | ret = GSS_S_COMPLETE; | ||
360 | out: | ||
361 | if (md5cksum.data) kfree(md5cksum.data); | ||
362 | return ret; | ||
363 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 9dfb68377d69..b048bf672da2 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
@@ -35,7 +35,6 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/socket.h> | ||
39 | #include <linux/module.h> | 38 | #include <linux/module.h> |
40 | #include <linux/sunrpc/msg_prot.h> | 39 | #include <linux/sunrpc/msg_prot.h> |
41 | #include <linux/sunrpc/gss_asn1.h> | 40 | #include <linux/sunrpc/gss_asn1.h> |
@@ -251,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize, | |||
251 | 250 | ||
252 | u32 | 251 | u32 |
253 | gss_get_mic(struct gss_ctx *context_handle, | 252 | gss_get_mic(struct gss_ctx *context_handle, |
254 | u32 qop, | ||
255 | struct xdr_buf *message, | 253 | struct xdr_buf *message, |
256 | struct xdr_netobj *mic_token) | 254 | struct xdr_netobj *mic_token) |
257 | { | 255 | { |
258 | return context_handle->mech_type->gm_ops | 256 | return context_handle->mech_type->gm_ops |
259 | ->gss_get_mic(context_handle, | 257 | ->gss_get_mic(context_handle, |
260 | qop, | ||
261 | message, | 258 | message, |
262 | mic_token); | 259 | mic_token); |
263 | } | 260 | } |
@@ -267,16 +264,34 @@ gss_get_mic(struct gss_ctx *context_handle, | |||
267 | u32 | 264 | u32 |
268 | gss_verify_mic(struct gss_ctx *context_handle, | 265 | gss_verify_mic(struct gss_ctx *context_handle, |
269 | struct xdr_buf *message, | 266 | struct xdr_buf *message, |
270 | struct xdr_netobj *mic_token, | 267 | struct xdr_netobj *mic_token) |
271 | u32 *qstate) | ||
272 | { | 268 | { |
273 | return context_handle->mech_type->gm_ops | 269 | return context_handle->mech_type->gm_ops |
274 | ->gss_verify_mic(context_handle, | 270 | ->gss_verify_mic(context_handle, |
275 | message, | 271 | message, |
276 | mic_token, | 272 | mic_token); |
277 | qstate); | ||
278 | } | 273 | } |
279 | 274 | ||
275 | u32 | ||
276 | gss_wrap(struct gss_ctx *ctx_id, | ||
277 | int offset, | ||
278 | struct xdr_buf *buf, | ||
279 | struct page **inpages) | ||
280 | { | ||
281 | return ctx_id->mech_type->gm_ops | ||
282 | ->gss_wrap(ctx_id, offset, buf, inpages); | ||
283 | } | ||
284 | |||
285 | u32 | ||
286 | gss_unwrap(struct gss_ctx *ctx_id, | ||
287 | int offset, | ||
288 | struct xdr_buf *buf) | ||
289 | { | ||
290 | return ctx_id->mech_type->gm_ops | ||
291 | ->gss_unwrap(ctx_id, offset, buf); | ||
292 | } | ||
293 | |||
294 | |||
280 | /* gss_delete_sec_context: free all resources associated with context_handle. | 295 | /* gss_delete_sec_context: free all resources associated with context_handle. |
281 | * Note this differs from the RFC 2744-specified prototype in that we don't | 296 | * Note this differs from the RFC 2744-specified prototype in that we don't |
282 | * bother returning an output token, since it would never be used anyway. */ | 297 | * bother returning an output token, since it would never be used anyway. */ |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 6c97d61baa9b..39b3edc14694 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c | |||
@@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { | |||
224 | static u32 | 224 | static u32 |
225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, | 225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, |
226 | struct xdr_buf *signbuf, | 226 | struct xdr_buf *signbuf, |
227 | struct xdr_netobj *checksum, | 227 | struct xdr_netobj *checksum) |
228 | u32 *qstate) { | 228 | { |
229 | u32 maj_stat = 0; | 229 | u32 maj_stat = 0; |
230 | int qop_state = 0; | ||
231 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 230 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
232 | 231 | ||
233 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); | 232 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); |
234 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, | 233 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); |
235 | SPKM_MIC_TOK); | ||
236 | |||
237 | if (!maj_stat && qop_state) | ||
238 | *qstate = qop_state; | ||
239 | 234 | ||
240 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); | 235 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); |
241 | return maj_stat; | 236 | return maj_stat; |
@@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, | |||
243 | 238 | ||
244 | static u32 | 239 | static u32 |
245 | gss_get_mic_spkm3(struct gss_ctx *ctx, | 240 | gss_get_mic_spkm3(struct gss_ctx *ctx, |
246 | u32 qop, | ||
247 | struct xdr_buf *message_buffer, | 241 | struct xdr_buf *message_buffer, |
248 | struct xdr_netobj *message_token) { | 242 | struct xdr_netobj *message_token) |
243 | { | ||
249 | u32 err = 0; | 244 | u32 err = 0; |
250 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 245 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
251 | 246 | ||
252 | dprintk("RPC: gss_get_mic_spkm3\n"); | 247 | dprintk("RPC: gss_get_mic_spkm3\n"); |
253 | 248 | ||
254 | err = spkm3_make_token(sctx, qop, message_buffer, | 249 | err = spkm3_make_token(sctx, message_buffer, |
255 | message_token, SPKM_MIC_TOK); | 250 | message_token, SPKM_MIC_TOK); |
256 | return err; | 251 | return err; |
257 | } | 252 | } |
@@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = { | |||
264 | }; | 259 | }; |
265 | 260 | ||
266 | static struct pf_desc gss_spkm3_pfs[] = { | 261 | static struct pf_desc gss_spkm3_pfs[] = { |
267 | {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, | 262 | {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, |
268 | {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, | 263 | {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, |
269 | }; | 264 | }; |
270 | 265 | ||
271 | static struct gss_api_mech gss_spkm3_mech = { | 266 | static struct gss_api_mech gss_spkm3_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 25339868d462..148201e929d0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c | |||
@@ -51,7 +51,7 @@ | |||
51 | */ | 51 | */ |
52 | 52 | ||
53 | u32 | 53 | u32 |
54 | spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | 54 | spkm3_make_token(struct spkm3_ctx *ctx, |
55 | struct xdr_buf * text, struct xdr_netobj * token, | 55 | struct xdr_buf * text, struct xdr_netobj * token, |
56 | int toktype) | 56 | int toktype) |
57 | { | 57 | { |
@@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | |||
68 | dprintk("RPC: spkm3_make_token\n"); | 68 | dprintk("RPC: spkm3_make_token\n"); |
69 | 69 | ||
70 | now = jiffies; | 70 | now = jiffies; |
71 | if (qop_req != 0) | ||
72 | goto out_err; | ||
73 | 71 | ||
74 | if (ctx->ctx_id.len != 16) { | 72 | if (ctx->ctx_id.len != 16) { |
75 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", | 73 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 65ce81bf0bc4..c3c0d9586103 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c | |||
@@ -52,7 +52,7 @@ u32 | |||
52 | spkm3_read_token(struct spkm3_ctx *ctx, | 52 | spkm3_read_token(struct spkm3_ctx *ctx, |
53 | struct xdr_netobj *read_token, /* checksum */ | 53 | struct xdr_netobj *read_token, /* checksum */ |
54 | struct xdr_buf *message_buffer, /* signbuf */ | 54 | struct xdr_buf *message_buffer, /* signbuf */ |
55 | int *qop_state, int toktype) | 55 | int toktype) |
56 | { | 56 | { |
57 | s32 code; | 57 | s32 code; |
58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; | 58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e3308195374e..e4ada15ed856 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, | |||
566 | 566 | ||
567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ | 567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ |
568 | return SVC_OK; | 568 | return SVC_OK; |
569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) | 569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { |
570 | != GSS_S_COMPLETE) { | ||
571 | *authp = rpcsec_gsserr_credproblem; | 570 | *authp = rpcsec_gsserr_credproblem; |
572 | return SVC_DENIED; | 571 | return SVC_DENIED; |
573 | } | 572 | } |
@@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) | |||
604 | xdr_buf_from_iov(&iov, &verf_data); | 603 | xdr_buf_from_iov(&iov, &verf_data); |
605 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; | 604 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; |
606 | mic.data = (u8 *)(p + 1); | 605 | mic.data = (u8 *)(p + 1); |
607 | maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); | 606 | maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); |
608 | if (maj_stat != GSS_S_COMPLETE) | 607 | if (maj_stat != GSS_S_COMPLETE) |
609 | return -1; | 608 | return -1; |
610 | *p++ = htonl(mic.len); | 609 | *p++ = htonl(mic.len); |
@@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) | |||
710 | goto out; | 709 | goto out; |
711 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) | 710 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) |
712 | goto out; | 711 | goto out; |
713 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); | 712 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); |
714 | if (maj_stat != GSS_S_COMPLETE) | 713 | if (maj_stat != GSS_S_COMPLETE) |
715 | goto out; | 714 | goto out; |
716 | if (ntohl(svc_getu32(&buf->head[0])) != seq) | 715 | if (ntohl(svc_getu32(&buf->head[0])) != seq) |
@@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) | |||
1012 | resv = &resbuf->tail[0]; | 1011 | resv = &resbuf->tail[0]; |
1013 | } | 1012 | } |
1014 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; | 1013 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; |
1015 | if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) | 1014 | if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) |
1016 | goto out_err; | 1015 | goto out_err; |
1017 | svc_putu32(resv, htonl(mic.len)); | 1016 | svc_putu32(resv, htonl(mic.len)); |
1018 | memset(mic.data + mic.len, 0, | 1017 | memset(mic.data + mic.len, 0, |
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 9b72d3abf823..f56767aaa927 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c | |||
@@ -7,9 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/socket.h> | ||
11 | #include <linux/module.h> | 10 | #include <linux/module.h> |
12 | #include <linux/in.h> | ||
13 | #include <linux/utsname.h> | 11 | #include <linux/utsname.h> |
14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4ff297a9b15b..890fb5ea0dcb 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
@@ -9,8 +9,6 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/socket.h> | ||
13 | #include <linux/in.h> | ||
14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/auth.h> | 13 | #include <linux/sunrpc/auth.h> |
16 | 14 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f17e6153b688..702ede309b06 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/net/sunrpc/rpcclnt.c | 2 | * linux/net/sunrpc/clnt.c |
3 | * | 3 | * |
4 | * This file contains the high-level RPC interface. | 4 | * This file contains the high-level RPC interface. |
5 | * It is modeled as a finite state machine to support both synchronous | 5 | * It is modeled as a finite state machine to support both synchronous |
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/in.h> | ||
31 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
32 | 31 | ||
33 | #include <linux/sunrpc/clnt.h> | 32 | #include <linux/sunrpc/clnt.h> |
@@ -53,6 +52,7 @@ static void call_allocate(struct rpc_task *task); | |||
53 | static void call_encode(struct rpc_task *task); | 52 | static void call_encode(struct rpc_task *task); |
54 | static void call_decode(struct rpc_task *task); | 53 | static void call_decode(struct rpc_task *task); |
55 | static void call_bind(struct rpc_task *task); | 54 | static void call_bind(struct rpc_task *task); |
55 | static void call_bind_status(struct rpc_task *task); | ||
56 | static void call_transmit(struct rpc_task *task); | 56 | static void call_transmit(struct rpc_task *task); |
57 | static void call_status(struct rpc_task *task); | 57 | static void call_status(struct rpc_task *task); |
58 | static void call_refresh(struct rpc_task *task); | 58 | static void call_refresh(struct rpc_task *task); |
@@ -517,15 +517,8 @@ void | |||
517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) | 517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) |
518 | { | 518 | { |
519 | struct rpc_xprt *xprt = clnt->cl_xprt; | 519 | struct rpc_xprt *xprt = clnt->cl_xprt; |
520 | 520 | if (xprt->ops->set_buffer_size) | |
521 | xprt->sndsize = 0; | 521 | xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); |
522 | if (sndsize) | ||
523 | xprt->sndsize = sndsize + RPC_SLACK_SPACE; | ||
524 | xprt->rcvsize = 0; | ||
525 | if (rcvsize) | ||
526 | xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; | ||
527 | if (xprt_connected(xprt)) | ||
528 | xprt_sock_setbufsize(xprt); | ||
529 | } | 522 | } |
530 | 523 | ||
531 | /* | 524 | /* |
@@ -685,13 +678,11 @@ call_allocate(struct rpc_task *task) | |||
685 | static void | 678 | static void |
686 | call_encode(struct rpc_task *task) | 679 | call_encode(struct rpc_task *task) |
687 | { | 680 | { |
688 | struct rpc_clnt *clnt = task->tk_client; | ||
689 | struct rpc_rqst *req = task->tk_rqstp; | 681 | struct rpc_rqst *req = task->tk_rqstp; |
690 | struct xdr_buf *sndbuf = &req->rq_snd_buf; | 682 | struct xdr_buf *sndbuf = &req->rq_snd_buf; |
691 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 683 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
692 | unsigned int bufsiz; | 684 | unsigned int bufsiz; |
693 | kxdrproc_t encode; | 685 | kxdrproc_t encode; |
694 | int status; | ||
695 | u32 *p; | 686 | u32 *p; |
696 | 687 | ||
697 | dprintk("RPC: %4d call_encode (status %d)\n", | 688 | dprintk("RPC: %4d call_encode (status %d)\n", |
@@ -719,11 +710,15 @@ call_encode(struct rpc_task *task) | |||
719 | rpc_exit(task, -EIO); | 710 | rpc_exit(task, -EIO); |
720 | return; | 711 | return; |
721 | } | 712 | } |
722 | if (encode && (status = rpcauth_wrap_req(task, encode, req, p, | 713 | if (encode == NULL) |
723 | task->tk_msg.rpc_argp)) < 0) { | 714 | return; |
724 | printk(KERN_WARNING "%s: can't encode arguments: %d\n", | 715 | |
725 | clnt->cl_protname, -status); | 716 | task->tk_status = rpcauth_wrap_req(task, encode, req, p, |
726 | rpc_exit(task, status); | 717 | task->tk_msg.rpc_argp); |
718 | if (task->tk_status == -ENOMEM) { | ||
719 | /* XXX: Is this sane? */ | ||
720 | rpc_delay(task, 3*HZ); | ||
721 | task->tk_status = -EAGAIN; | ||
727 | } | 722 | } |
728 | } | 723 | } |
729 | 724 | ||
@@ -734,43 +729,95 @@ static void | |||
734 | call_bind(struct rpc_task *task) | 729 | call_bind(struct rpc_task *task) |
735 | { | 730 | { |
736 | struct rpc_clnt *clnt = task->tk_client; | 731 | struct rpc_clnt *clnt = task->tk_client; |
737 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
738 | |||
739 | dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, | ||
740 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | ||
741 | 732 | ||
742 | task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; | 733 | dprintk("RPC: %4d call_bind (status %d)\n", |
734 | task->tk_pid, task->tk_status); | ||
743 | 735 | ||
736 | task->tk_action = call_connect; | ||
744 | if (!clnt->cl_port) { | 737 | if (!clnt->cl_port) { |
745 | task->tk_action = call_connect; | 738 | task->tk_action = call_bind_status; |
746 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | 739 | task->tk_timeout = task->tk_xprt->bind_timeout; |
747 | rpc_getport(task, clnt); | 740 | rpc_getport(task, clnt); |
748 | } | 741 | } |
749 | } | 742 | } |
750 | 743 | ||
751 | /* | 744 | /* |
752 | * 4a. Connect to the RPC server (TCP case) | 745 | * 4a. Sort out bind result |
746 | */ | ||
747 | static void | ||
748 | call_bind_status(struct rpc_task *task) | ||
749 | { | ||
750 | int status = -EACCES; | ||
751 | |||
752 | if (task->tk_status >= 0) { | ||
753 | dprintk("RPC: %4d call_bind_status (status %d)\n", | ||
754 | task->tk_pid, task->tk_status); | ||
755 | task->tk_status = 0; | ||
756 | task->tk_action = call_connect; | ||
757 | return; | ||
758 | } | ||
759 | |||
760 | switch (task->tk_status) { | ||
761 | case -EACCES: | ||
762 | dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", | ||
763 | task->tk_pid); | ||
764 | rpc_delay(task, 3*HZ); | ||
765 | goto retry_bind; | ||
766 | case -ETIMEDOUT: | ||
767 | dprintk("RPC: %4d rpcbind request timed out\n", | ||
768 | task->tk_pid); | ||
769 | if (RPC_IS_SOFT(task)) { | ||
770 | status = -EIO; | ||
771 | break; | ||
772 | } | ||
773 | goto retry_bind; | ||
774 | case -EPFNOSUPPORT: | ||
775 | dprintk("RPC: %4d remote rpcbind service unavailable\n", | ||
776 | task->tk_pid); | ||
777 | break; | ||
778 | case -EPROTONOSUPPORT: | ||
779 | dprintk("RPC: %4d remote rpcbind version 2 unavailable\n", | ||
780 | task->tk_pid); | ||
781 | break; | ||
782 | default: | ||
783 | dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", | ||
784 | task->tk_pid, -task->tk_status); | ||
785 | status = -EIO; | ||
786 | break; | ||
787 | } | ||
788 | |||
789 | rpc_exit(task, status); | ||
790 | return; | ||
791 | |||
792 | retry_bind: | ||
793 | task->tk_status = 0; | ||
794 | task->tk_action = call_bind; | ||
795 | return; | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * 4b. Connect to the RPC server | ||
753 | */ | 800 | */ |
754 | static void | 801 | static void |
755 | call_connect(struct rpc_task *task) | 802 | call_connect(struct rpc_task *task) |
756 | { | 803 | { |
757 | struct rpc_clnt *clnt = task->tk_client; | 804 | struct rpc_xprt *xprt = task->tk_xprt; |
758 | 805 | ||
759 | dprintk("RPC: %4d call_connect status %d\n", | 806 | dprintk("RPC: %4d call_connect xprt %p %s connected\n", |
760 | task->tk_pid, task->tk_status); | 807 | task->tk_pid, xprt, |
808 | (xprt_connected(xprt) ? "is" : "is not")); | ||
761 | 809 | ||
762 | if (xprt_connected(clnt->cl_xprt)) { | 810 | task->tk_action = call_transmit; |
763 | task->tk_action = call_transmit; | 811 | if (!xprt_connected(xprt)) { |
764 | return; | 812 | task->tk_action = call_connect_status; |
813 | if (task->tk_status < 0) | ||
814 | return; | ||
815 | xprt_connect(task); | ||
765 | } | 816 | } |
766 | task->tk_action = call_connect_status; | ||
767 | if (task->tk_status < 0) | ||
768 | return; | ||
769 | xprt_connect(task); | ||
770 | } | 817 | } |
771 | 818 | ||
772 | /* | 819 | /* |
773 | * 4b. Sort out connect result | 820 | * 4c. Sort out connect result |
774 | */ | 821 | */ |
775 | static void | 822 | static void |
776 | call_connect_status(struct rpc_task *task) | 823 | call_connect_status(struct rpc_task *task) |
@@ -778,6 +825,9 @@ call_connect_status(struct rpc_task *task) | |||
778 | struct rpc_clnt *clnt = task->tk_client; | 825 | struct rpc_clnt *clnt = task->tk_client; |
779 | int status = task->tk_status; | 826 | int status = task->tk_status; |
780 | 827 | ||
828 | dprintk("RPC: %5u call_connect_status (status %d)\n", | ||
829 | task->tk_pid, task->tk_status); | ||
830 | |||
781 | task->tk_status = 0; | 831 | task->tk_status = 0; |
782 | if (status >= 0) { | 832 | if (status >= 0) { |
783 | clnt->cl_stats->netreconn++; | 833 | clnt->cl_stats->netreconn++; |
@@ -785,17 +835,19 @@ call_connect_status(struct rpc_task *task) | |||
785 | return; | 835 | return; |
786 | } | 836 | } |
787 | 837 | ||
788 | /* Something failed: we may have to rebind */ | 838 | /* Something failed: remote service port may have changed */ |
789 | if (clnt->cl_autobind) | 839 | if (clnt->cl_autobind) |
790 | clnt->cl_port = 0; | 840 | clnt->cl_port = 0; |
841 | |||
791 | switch (status) { | 842 | switch (status) { |
792 | case -ENOTCONN: | 843 | case -ENOTCONN: |
793 | case -ETIMEDOUT: | 844 | case -ETIMEDOUT: |
794 | case -EAGAIN: | 845 | case -EAGAIN: |
795 | task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; | 846 | task->tk_action = call_bind; |
796 | break; | 847 | break; |
797 | default: | 848 | default: |
798 | rpc_exit(task, -EIO); | 849 | rpc_exit(task, -EIO); |
850 | break; | ||
799 | } | 851 | } |
800 | } | 852 | } |
801 | 853 | ||
@@ -815,10 +867,12 @@ call_transmit(struct rpc_task *task) | |||
815 | if (task->tk_status != 0) | 867 | if (task->tk_status != 0) |
816 | return; | 868 | return; |
817 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 869 | /* Encode here so that rpcsec_gss can use correct sequence number. */ |
818 | if (!task->tk_rqstp->rq_bytes_sent) | 870 | if (task->tk_rqstp->rq_bytes_sent == 0) { |
819 | call_encode(task); | 871 | call_encode(task); |
820 | if (task->tk_status < 0) | 872 | /* Did the encode result in an error condition? */ |
821 | return; | 873 | if (task->tk_status != 0) |
874 | goto out_nosend; | ||
875 | } | ||
822 | xprt_transmit(task); | 876 | xprt_transmit(task); |
823 | if (task->tk_status < 0) | 877 | if (task->tk_status < 0) |
824 | return; | 878 | return; |
@@ -826,6 +880,10 @@ call_transmit(struct rpc_task *task) | |||
826 | task->tk_action = NULL; | 880 | task->tk_action = NULL; |
827 | rpc_wake_up_task(task); | 881 | rpc_wake_up_task(task); |
828 | } | 882 | } |
883 | return; | ||
884 | out_nosend: | ||
885 | /* release socket write lock before attempting to handle error */ | ||
886 | xprt_abort_transmit(task); | ||
829 | } | 887 | } |
830 | 888 | ||
831 | /* | 889 | /* |
@@ -1020,13 +1078,12 @@ static u32 * | |||
1020 | call_header(struct rpc_task *task) | 1078 | call_header(struct rpc_task *task) |
1021 | { | 1079 | { |
1022 | struct rpc_clnt *clnt = task->tk_client; | 1080 | struct rpc_clnt *clnt = task->tk_client; |
1023 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
1024 | struct rpc_rqst *req = task->tk_rqstp; | 1081 | struct rpc_rqst *req = task->tk_rqstp; |
1025 | u32 *p = req->rq_svec[0].iov_base; | 1082 | u32 *p = req->rq_svec[0].iov_base; |
1026 | 1083 | ||
1027 | /* FIXME: check buffer size? */ | 1084 | /* FIXME: check buffer size? */ |
1028 | if (xprt->stream) | 1085 | |
1029 | *p++ = 0; /* fill in later */ | 1086 | p = xprt_skip_transport_header(task->tk_xprt, p); |
1030 | *p++ = req->rq_xid; /* XID */ | 1087 | *p++ = req->rq_xid; /* XID */ |
1031 | *p++ = htonl(RPC_CALL); /* CALL */ | 1088 | *p++ = htonl(RPC_CALL); /* CALL */ |
1032 | *p++ = htonl(RPC_VERSION); /* RPC version */ | 1089 | *p++ = htonl(RPC_VERSION); /* RPC version */ |
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 4e81f2766923..a398575f94b8 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #define PMAP_GETPORT 3 | 26 | #define PMAP_GETPORT 3 |
27 | 27 | ||
28 | static struct rpc_procinfo pmap_procedures[]; | 28 | static struct rpc_procinfo pmap_procedures[]; |
29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); | 29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); |
30 | static void pmap_getport_done(struct rpc_task *); | 30 | static void pmap_getport_done(struct rpc_task *); |
31 | static struct rpc_program pmap_program; | 31 | static struct rpc_program pmap_program; |
32 | static DEFINE_SPINLOCK(pmap_lock); | 32 | static DEFINE_SPINLOCK(pmap_lock); |
@@ -65,7 +65,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) | |||
65 | map->pm_binding = 1; | 65 | map->pm_binding = 1; |
66 | spin_unlock(&pmap_lock); | 66 | spin_unlock(&pmap_lock); |
67 | 67 | ||
68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); | 68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); |
69 | if (IS_ERR(pmap_clnt)) { | 69 | if (IS_ERR(pmap_clnt)) { |
70 | task->tk_status = PTR_ERR(pmap_clnt); | 70 | task->tk_status = PTR_ERR(pmap_clnt); |
71 | goto bailout; | 71 | goto bailout; |
@@ -112,7 +112,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | |||
112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); | 112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); |
113 | 113 | ||
114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); | 114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); |
115 | pmap_clnt = pmap_create(hostname, sin, prot); | 115 | pmap_clnt = pmap_create(hostname, sin, prot, 0); |
116 | if (IS_ERR(pmap_clnt)) | 116 | if (IS_ERR(pmap_clnt)) |
117 | return PTR_ERR(pmap_clnt); | 117 | return PTR_ERR(pmap_clnt); |
118 | 118 | ||
@@ -171,7 +171,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
171 | 171 | ||
172 | sin.sin_family = AF_INET; | 172 | sin.sin_family = AF_INET; |
173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | 173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); |
174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); | 174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); |
175 | if (IS_ERR(pmap_clnt)) { | 175 | if (IS_ERR(pmap_clnt)) { |
176 | error = PTR_ERR(pmap_clnt); | 176 | error = PTR_ERR(pmap_clnt); |
177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); | 177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); |
@@ -198,7 +198,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
198 | } | 198 | } |
199 | 199 | ||
200 | static struct rpc_clnt * | 200 | static struct rpc_clnt * |
201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | 201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) |
202 | { | 202 | { |
203 | struct rpc_xprt *xprt; | 203 | struct rpc_xprt *xprt; |
204 | struct rpc_clnt *clnt; | 204 | struct rpc_clnt *clnt; |
@@ -208,6 +208,8 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | |||
208 | if (IS_ERR(xprt)) | 208 | if (IS_ERR(xprt)) |
209 | return (struct rpc_clnt *)xprt; | 209 | return (struct rpc_clnt *)xprt; |
210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); | 210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); |
211 | if (!privileged) | ||
212 | xprt->resvport = 0; | ||
211 | 213 | ||
212 | /* printk("pmap: create clnt\n"); */ | 214 | /* printk("pmap: create clnt\n"); */ |
213 | clnt = rpc_new_client(xprt, hostname, | 215 | clnt = rpc_new_client(xprt, hostname, |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c new file mode 100644 index 000000000000..8f97e90f36c8 --- /dev/null +++ b/net/sunrpc/socklib.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/socklib.c | ||
3 | * | ||
4 | * Common socket helper routines for RPC client and server | ||
5 | * | ||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/pagemap.h> | ||
11 | #include <linux/udp.h> | ||
12 | #include <linux/sunrpc/xdr.h> | ||
13 | |||
14 | |||
15 | /** | ||
16 | * skb_read_bits - copy some data bits from skb to internal buffer | ||
17 | * @desc: sk_buff copy helper | ||
18 | * @to: copy destination | ||
19 | * @len: number of bytes to copy | ||
20 | * | ||
21 | * Possibly called several times to iterate over an sk_buff and copy | ||
22 | * data out of it. | ||
23 | */ | ||
24 | static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
25 | { | ||
26 | if (len > desc->count) | ||
27 | len = desc->count; | ||
28 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
29 | return 0; | ||
30 | desc->count -= len; | ||
31 | desc->offset += len; | ||
32 | return len; | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * skb_read_and_csum_bits - copy and checksum from skb to buffer | ||
37 | * @desc: sk_buff copy helper | ||
38 | * @to: copy destination | ||
39 | * @len: number of bytes to copy | ||
40 | * | ||
41 | * Same as skb_read_bits, but calculate a checksum at the same time. | ||
42 | */ | ||
43 | static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
44 | { | ||
45 | unsigned int csum2, pos; | ||
46 | |||
47 | if (len > desc->count) | ||
48 | len = desc->count; | ||
49 | pos = desc->offset; | ||
50 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
51 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
52 | desc->count -= len; | ||
53 | desc->offset += len; | ||
54 | return len; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * xdr_partial_copy_from_skb - copy data out of an skb | ||
59 | * @xdr: target XDR buffer | ||
60 | * @base: starting offset | ||
61 | * @desc: sk_buff copy helper | ||
62 | * @copy_actor: virtual method for copying data | ||
63 | * | ||
64 | */ | ||
65 | ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) | ||
66 | { | ||
67 | struct page **ppage = xdr->pages; | ||
68 | unsigned int len, pglen = xdr->page_len; | ||
69 | ssize_t copied = 0; | ||
70 | int ret; | ||
71 | |||
72 | len = xdr->head[0].iov_len; | ||
73 | if (base < len) { | ||
74 | len -= base; | ||
75 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
76 | copied += ret; | ||
77 | if (ret != len || !desc->count) | ||
78 | goto out; | ||
79 | base = 0; | ||
80 | } else | ||
81 | base -= len; | ||
82 | |||
83 | if (unlikely(pglen == 0)) | ||
84 | goto copy_tail; | ||
85 | if (unlikely(base >= pglen)) { | ||
86 | base -= pglen; | ||
87 | goto copy_tail; | ||
88 | } | ||
89 | if (base || xdr->page_base) { | ||
90 | pglen -= base; | ||
91 | base += xdr->page_base; | ||
92 | ppage += base >> PAGE_CACHE_SHIFT; | ||
93 | base &= ~PAGE_CACHE_MASK; | ||
94 | } | ||
95 | do { | ||
96 | char *kaddr; | ||
97 | |||
98 | /* ACL likes to be lazy in allocating pages - ACLs | ||
99 | * are small by default but can get huge. */ | ||
100 | if (unlikely(*ppage == NULL)) { | ||
101 | *ppage = alloc_page(GFP_ATOMIC); | ||
102 | if (unlikely(*ppage == NULL)) { | ||
103 | if (copied == 0) | ||
104 | copied = -ENOMEM; | ||
105 | goto out; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | len = PAGE_CACHE_SIZE; | ||
110 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
111 | if (base) { | ||
112 | len -= base; | ||
113 | if (pglen < len) | ||
114 | len = pglen; | ||
115 | ret = copy_actor(desc, kaddr + base, len); | ||
116 | base = 0; | ||
117 | } else { | ||
118 | if (pglen < len) | ||
119 | len = pglen; | ||
120 | ret = copy_actor(desc, kaddr, len); | ||
121 | } | ||
122 | flush_dcache_page(*ppage); | ||
123 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
124 | copied += ret; | ||
125 | if (ret != len || !desc->count) | ||
126 | goto out; | ||
127 | ppage++; | ||
128 | } while ((pglen -= len) != 0); | ||
129 | copy_tail: | ||
130 | len = xdr->tail[0].iov_len; | ||
131 | if (base < len) | ||
132 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
133 | out: | ||
134 | return copied; | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * csum_partial_copy_to_xdr - checksum and copy data | ||
139 | * @xdr: target XDR buffer | ||
140 | * @skb: source skb | ||
141 | * | ||
142 | * We have set things up such that we perform the checksum of the UDP | ||
143 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
144 | */ | ||
145 | int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
146 | { | ||
147 | skb_reader_t desc; | ||
148 | |||
149 | desc.skb = skb; | ||
150 | desc.offset = sizeof(struct udphdr); | ||
151 | desc.count = skb->len - desc.offset; | ||
152 | |||
153 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
154 | goto no_checksum; | ||
155 | |||
156 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
157 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
158 | return -1; | ||
159 | if (desc.offset != skb->len) { | ||
160 | unsigned int csum2; | ||
161 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
162 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
163 | } | ||
164 | if (desc.count) | ||
165 | return -1; | ||
166 | if ((unsigned short)csum_fold(desc.csum)) | ||
167 | return -1; | ||
168 | return 0; | ||
169 | no_checksum: | ||
170 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
171 | return -1; | ||
172 | if (desc.count) | ||
173 | return -1; | ||
174 | return 0; | ||
175 | } | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ed48ff022d35..2387e7b823ff 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/socket.h> | ||
14 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
15 | #include <linux/uio.h> | 14 | #include <linux/uio.h> |
16 | #include <linux/unistd.h> | 15 | #include <linux/unistd.h> |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 691dea4a58e7..f16e7cdd6150 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -548,9 +548,6 @@ svc_write_space(struct sock *sk) | |||
548 | /* | 548 | /* |
549 | * Receive a datagram from a UDP socket. | 549 | * Receive a datagram from a UDP socket. |
550 | */ | 550 | */ |
551 | extern int | ||
552 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); | ||
553 | |||
554 | static int | 551 | static int |
555 | svc_udp_recvfrom(struct svc_rqst *rqstp) | 552 | svc_udp_recvfrom(struct svc_rqst *rqstp) |
556 | { | 553 | { |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1b9616a12e24..d0c9f460e411 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -119,8 +119,18 @@ done: | |||
119 | return 0; | 119 | return 0; |
120 | } | 120 | } |
121 | 121 | ||
122 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
123 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
124 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | ||
125 | EXPORT_SYMBOL(xprt_min_resvport); | ||
126 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | ||
127 | EXPORT_SYMBOL(xprt_max_resvport); | ||
128 | |||
129 | |||
122 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; | 130 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
123 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; | 131 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
132 | static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; | ||
133 | static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; | ||
124 | 134 | ||
125 | static ctl_table debug_table[] = { | 135 | static ctl_table debug_table[] = { |
126 | { | 136 | { |
@@ -177,6 +187,28 @@ static ctl_table debug_table[] = { | |||
177 | .extra1 = &min_slot_table_size, | 187 | .extra1 = &min_slot_table_size, |
178 | .extra2 = &max_slot_table_size | 188 | .extra2 = &max_slot_table_size |
179 | }, | 189 | }, |
190 | { | ||
191 | .ctl_name = CTL_MIN_RESVPORT, | ||
192 | .procname = "min_resvport", | ||
193 | .data = &xprt_min_resvport, | ||
194 | .maxlen = sizeof(unsigned int), | ||
195 | .mode = 0644, | ||
196 | .proc_handler = &proc_dointvec_minmax, | ||
197 | .strategy = &sysctl_intvec, | ||
198 | .extra1 = &xprt_min_resvport_limit, | ||
199 | .extra2 = &xprt_max_resvport_limit | ||
200 | }, | ||
201 | { | ||
202 | .ctl_name = CTL_MAX_RESVPORT, | ||
203 | .procname = "max_resvport", | ||
204 | .data = &xprt_max_resvport, | ||
205 | .maxlen = sizeof(unsigned int), | ||
206 | .mode = 0644, | ||
207 | .proc_handler = &proc_dointvec_minmax, | ||
208 | .strategy = &sysctl_intvec, | ||
209 | .extra1 = &xprt_min_resvport_limit, | ||
210 | .extra2 = &xprt_max_resvport_limit | ||
211 | }, | ||
180 | { .ctl_name = 0 } | 212 | { .ctl_name = 0 } |
181 | }; | 213 | }; |
182 | 214 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fde16f40a581..32df43372ee9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -6,15 +6,12 @@ | |||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | ||
9 | #include <linux/types.h> | 10 | #include <linux/types.h> |
10 | #include <linux/socket.h> | ||
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/in.h> | ||
16 | #include <linux/net.h> | ||
17 | #include <net/sock.h> | ||
18 | #include <linux/sunrpc/xdr.h> | 15 | #include <linux/sunrpc/xdr.h> |
19 | #include <linux/sunrpc/msg_prot.h> | 16 | #include <linux/sunrpc/msg_prot.h> |
20 | 17 | ||
@@ -176,178 +173,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, | |||
176 | xdr->buflen += len; | 173 | xdr->buflen += len; |
177 | } | 174 | } |
178 | 175 | ||
179 | ssize_t | ||
180 | xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, | ||
181 | skb_reader_t *desc, | ||
182 | skb_read_actor_t copy_actor) | ||
183 | { | ||
184 | struct page **ppage = xdr->pages; | ||
185 | unsigned int len, pglen = xdr->page_len; | ||
186 | ssize_t copied = 0; | ||
187 | int ret; | ||
188 | |||
189 | len = xdr->head[0].iov_len; | ||
190 | if (base < len) { | ||
191 | len -= base; | ||
192 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
193 | copied += ret; | ||
194 | if (ret != len || !desc->count) | ||
195 | goto out; | ||
196 | base = 0; | ||
197 | } else | ||
198 | base -= len; | ||
199 | |||
200 | if (pglen == 0) | ||
201 | goto copy_tail; | ||
202 | if (base >= pglen) { | ||
203 | base -= pglen; | ||
204 | goto copy_tail; | ||
205 | } | ||
206 | if (base || xdr->page_base) { | ||
207 | pglen -= base; | ||
208 | base += xdr->page_base; | ||
209 | ppage += base >> PAGE_CACHE_SHIFT; | ||
210 | base &= ~PAGE_CACHE_MASK; | ||
211 | } | ||
212 | do { | ||
213 | char *kaddr; | ||
214 | |||
215 | /* ACL likes to be lazy in allocating pages - ACLs | ||
216 | * are small by default but can get huge. */ | ||
217 | if (unlikely(*ppage == NULL)) { | ||
218 | *ppage = alloc_page(GFP_ATOMIC); | ||
219 | if (unlikely(*ppage == NULL)) { | ||
220 | if (copied == 0) | ||
221 | copied = -ENOMEM; | ||
222 | goto out; | ||
223 | } | ||
224 | } | ||
225 | |||
226 | len = PAGE_CACHE_SIZE; | ||
227 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
228 | if (base) { | ||
229 | len -= base; | ||
230 | if (pglen < len) | ||
231 | len = pglen; | ||
232 | ret = copy_actor(desc, kaddr + base, len); | ||
233 | base = 0; | ||
234 | } else { | ||
235 | if (pglen < len) | ||
236 | len = pglen; | ||
237 | ret = copy_actor(desc, kaddr, len); | ||
238 | } | ||
239 | flush_dcache_page(*ppage); | ||
240 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
241 | copied += ret; | ||
242 | if (ret != len || !desc->count) | ||
243 | goto out; | ||
244 | ppage++; | ||
245 | } while ((pglen -= len) != 0); | ||
246 | copy_tail: | ||
247 | len = xdr->tail[0].iov_len; | ||
248 | if (base < len) | ||
249 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
250 | out: | ||
251 | return copied; | ||
252 | } | ||
253 | |||
254 | |||
255 | int | ||
256 | xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | ||
257 | struct xdr_buf *xdr, unsigned int base, int msgflags) | ||
258 | { | ||
259 | struct page **ppage = xdr->pages; | ||
260 | unsigned int len, pglen = xdr->page_len; | ||
261 | int err, ret = 0; | ||
262 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
263 | |||
264 | len = xdr->head[0].iov_len; | ||
265 | if (base < len || (addr != NULL && base == 0)) { | ||
266 | struct kvec iov = { | ||
267 | .iov_base = xdr->head[0].iov_base + base, | ||
268 | .iov_len = len - base, | ||
269 | }; | ||
270 | struct msghdr msg = { | ||
271 | .msg_name = addr, | ||
272 | .msg_namelen = addrlen, | ||
273 | .msg_flags = msgflags, | ||
274 | }; | ||
275 | if (xdr->len > len) | ||
276 | msg.msg_flags |= MSG_MORE; | ||
277 | |||
278 | if (iov.iov_len != 0) | ||
279 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
280 | else | ||
281 | err = kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
282 | if (ret == 0) | ||
283 | ret = err; | ||
284 | else if (err > 0) | ||
285 | ret += err; | ||
286 | if (err != iov.iov_len) | ||
287 | goto out; | ||
288 | base = 0; | ||
289 | } else | ||
290 | base -= len; | ||
291 | |||
292 | if (pglen == 0) | ||
293 | goto copy_tail; | ||
294 | if (base >= pglen) { | ||
295 | base -= pglen; | ||
296 | goto copy_tail; | ||
297 | } | ||
298 | if (base || xdr->page_base) { | ||
299 | pglen -= base; | ||
300 | base += xdr->page_base; | ||
301 | ppage += base >> PAGE_CACHE_SHIFT; | ||
302 | base &= ~PAGE_CACHE_MASK; | ||
303 | } | ||
304 | |||
305 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
306 | do { | ||
307 | int flags = msgflags; | ||
308 | |||
309 | len = PAGE_CACHE_SIZE; | ||
310 | if (base) | ||
311 | len -= base; | ||
312 | if (pglen < len) | ||
313 | len = pglen; | ||
314 | |||
315 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
316 | flags |= MSG_MORE; | ||
317 | |||
318 | /* Hmm... We might be dealing with highmem pages */ | ||
319 | if (PageHighMem(*ppage)) | ||
320 | sendpage = sock_no_sendpage; | ||
321 | err = sendpage(sock, *ppage, base, len, flags); | ||
322 | if (ret == 0) | ||
323 | ret = err; | ||
324 | else if (err > 0) | ||
325 | ret += err; | ||
326 | if (err != len) | ||
327 | goto out; | ||
328 | base = 0; | ||
329 | ppage++; | ||
330 | } while ((pglen -= len) != 0); | ||
331 | copy_tail: | ||
332 | len = xdr->tail[0].iov_len; | ||
333 | if (base < len) { | ||
334 | struct kvec iov = { | ||
335 | .iov_base = xdr->tail[0].iov_base + base, | ||
336 | .iov_len = len - base, | ||
337 | }; | ||
338 | struct msghdr msg = { | ||
339 | .msg_flags = msgflags, | ||
340 | }; | ||
341 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
342 | if (ret == 0) | ||
343 | ret = err; | ||
344 | else if (err > 0) | ||
345 | ret += err; | ||
346 | } | ||
347 | out: | ||
348 | return ret; | ||
349 | } | ||
350 | |||
351 | 176 | ||
352 | /* | 177 | /* |
353 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf | 178 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c654e06b084..6dda3860351f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -10,12 +10,12 @@ | |||
10 | * one is available. Otherwise, it sleeps on the backlog queue | 10 | * one is available. Otherwise, it sleeps on the backlog queue |
11 | * (xprt_reserve). | 11 | * (xprt_reserve). |
12 | * - Next, the caller puts together the RPC message, stuffs it into | 12 | * - Next, the caller puts together the RPC message, stuffs it into |
13 | * the request struct, and calls xprt_call(). | 13 | * the request struct, and calls xprt_transmit(). |
14 | * - xprt_call transmits the message and installs the caller on the | 14 | * - xprt_transmit sends the message and installs the caller on the |
15 | * socket's wait list. At the same time, it installs a timer that | 15 | * transport's wait list. At the same time, it installs a timer that |
16 | * is run after the packet's timeout has expired. | 16 | * is run after the packet's timeout has expired. |
17 | * - When a packet arrives, the data_ready handler walks the list of | 17 | * - When a packet arrives, the data_ready handler walks the list of |
18 | * pending requests for that socket. If a matching XID is found, the | 18 | * pending requests for that transport. If a matching XID is found, the |
19 | * caller is woken up, and the timer removed. | 19 | * caller is woken up, and the timer removed. |
20 | * - When no reply arrives within the timeout interval, the timer is | 20 | * - When no reply arrives within the timeout interval, the timer is |
21 | * fired by the kernel and runs xprt_timer(). It either adjusts the | 21 | * fired by the kernel and runs xprt_timer(). It either adjusts the |
@@ -33,36 +33,17 @@ | |||
33 | * | 33 | * |
34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> | 34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> |
35 | * | 35 | * |
36 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | 36 | * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> |
37 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
38 | * TCP NFS related read + write fixes | ||
39 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
40 | * | ||
41 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
42 | * Fix behaviour when socket buffer is full. | ||
43 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
44 | */ | 37 | */ |
45 | 38 | ||
39 | #include <linux/module.h> | ||
40 | |||
46 | #include <linux/types.h> | 41 | #include <linux/types.h> |
47 | #include <linux/slab.h> | 42 | #include <linux/interrupt.h> |
48 | #include <linux/capability.h> | ||
49 | #include <linux/sched.h> | ||
50 | #include <linux/errno.h> | ||
51 | #include <linux/socket.h> | ||
52 | #include <linux/in.h> | ||
53 | #include <linux/net.h> | ||
54 | #include <linux/mm.h> | ||
55 | #include <linux/udp.h> | ||
56 | #include <linux/tcp.h> | ||
57 | #include <linux/sunrpc/clnt.h> | ||
58 | #include <linux/file.h> | ||
59 | #include <linux/workqueue.h> | 43 | #include <linux/workqueue.h> |
60 | #include <linux/random.h> | 44 | #include <linux/random.h> |
61 | 45 | ||
62 | #include <net/sock.h> | 46 | #include <linux/sunrpc/clnt.h> |
63 | #include <net/checksum.h> | ||
64 | #include <net/udp.h> | ||
65 | #include <net/tcp.h> | ||
66 | 47 | ||
67 | /* | 48 | /* |
68 | * Local variables | 49 | * Local variables |
@@ -73,81 +54,90 @@ | |||
73 | # define RPCDBG_FACILITY RPCDBG_XPRT | 54 | # define RPCDBG_FACILITY RPCDBG_XPRT |
74 | #endif | 55 | #endif |
75 | 56 | ||
76 | #define XPRT_MAX_BACKOFF (8) | ||
77 | #define XPRT_IDLE_TIMEOUT (5*60*HZ) | ||
78 | #define XPRT_MAX_RESVPORT (800) | ||
79 | |||
80 | /* | 57 | /* |
81 | * Local functions | 58 | * Local functions |
82 | */ | 59 | */ |
83 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); | 60 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); |
84 | static inline void do_xprt_reserve(struct rpc_task *); | 61 | static inline void do_xprt_reserve(struct rpc_task *); |
85 | static void xprt_disconnect(struct rpc_xprt *); | ||
86 | static void xprt_connect_status(struct rpc_task *task); | 62 | static void xprt_connect_status(struct rpc_task *task); |
87 | static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, | ||
88 | struct rpc_timeout *to); | ||
89 | static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); | ||
90 | static void xprt_bind_socket(struct rpc_xprt *, struct socket *); | ||
91 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | 63 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
92 | 64 | ||
93 | static int xprt_clear_backlog(struct rpc_xprt *xprt); | ||
94 | |||
95 | #ifdef RPC_DEBUG_DATA | ||
96 | /* | 65 | /* |
97 | * Print the buffer contents (first 128 bytes only--just enough for | 66 | * The transport code maintains an estimate on the maximum number of out- |
98 | * diropres return). | 67 | * standing RPC requests, using a smoothed version of the congestion |
68 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
69 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
70 | * halved; otherwise, it is incremented by 1/cwnd when | ||
71 | * | ||
72 | * - a reply is received and | ||
73 | * - a full number of requests are outstanding and | ||
74 | * - the congestion window hasn't been updated recently. | ||
99 | */ | 75 | */ |
100 | static void | 76 | #define RPC_CWNDSHIFT (8U) |
101 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | 77 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) |
102 | { | 78 | #define RPC_INITCWND RPC_CWNDSCALE |
103 | u8 *buf = (u8 *) packet; | 79 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) |
104 | int j; | ||
105 | |||
106 | dprintk("RPC: %s\n", msg); | ||
107 | for (j = 0; j < count && j < 128; j += 4) { | ||
108 | if (!(j & 31)) { | ||
109 | if (j) | ||
110 | dprintk("\n"); | ||
111 | dprintk("0x%04x ", j); | ||
112 | } | ||
113 | dprintk("%02x%02x%02x%02x ", | ||
114 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
115 | } | ||
116 | dprintk("\n"); | ||
117 | } | ||
118 | #else | ||
119 | static inline void | ||
120 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | ||
121 | { | ||
122 | /* NOP */ | ||
123 | } | ||
124 | #endif | ||
125 | 80 | ||
126 | /* | 81 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) |
127 | * Look up RPC transport given an INET socket | 82 | |
83 | /** | ||
84 | * xprt_reserve_xprt - serialize write access to transports | ||
85 | * @task: task that is requesting access to the transport | ||
86 | * | ||
87 | * This prevents mixing the payload of separate requests, and prevents | ||
88 | * transport connects from colliding with writes. No congestion control | ||
89 | * is provided. | ||
128 | */ | 90 | */ |
129 | static inline struct rpc_xprt * | 91 | int xprt_reserve_xprt(struct rpc_task *task) |
130 | xprt_from_sock(struct sock *sk) | ||
131 | { | 92 | { |
132 | return (struct rpc_xprt *) sk->sk_user_data; | 93 | struct rpc_xprt *xprt = task->tk_xprt; |
94 | struct rpc_rqst *req = task->tk_rqstp; | ||
95 | |||
96 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { | ||
97 | if (task == xprt->snd_task) | ||
98 | return 1; | ||
99 | if (task == NULL) | ||
100 | return 0; | ||
101 | goto out_sleep; | ||
102 | } | ||
103 | xprt->snd_task = task; | ||
104 | if (req) { | ||
105 | req->rq_bytes_sent = 0; | ||
106 | req->rq_ntrans++; | ||
107 | } | ||
108 | return 1; | ||
109 | |||
110 | out_sleep: | ||
111 | dprintk("RPC: %4d failed to lock transport %p\n", | ||
112 | task->tk_pid, xprt); | ||
113 | task->tk_timeout = 0; | ||
114 | task->tk_status = -EAGAIN; | ||
115 | if (req && req->rq_ntrans) | ||
116 | rpc_sleep_on(&xprt->resend, task, NULL, NULL); | ||
117 | else | ||
118 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
119 | return 0; | ||
133 | } | 120 | } |
134 | 121 | ||
135 | /* | 122 | /* |
136 | * Serialize write access to sockets, in order to prevent different | 123 | * xprt_reserve_xprt_cong - serialize write access to transports |
137 | * requests from interfering with each other. | 124 | * @task: task that is requesting access to the transport |
138 | * Also prevents TCP socket connects from colliding with writes. | 125 | * |
126 | * Same as xprt_reserve_xprt, but Van Jacobson congestion control is | ||
127 | * integrated into the decision of whether a request is allowed to be | ||
128 | * woken up and given access to the transport. | ||
139 | */ | 129 | */ |
140 | static int | 130 | int xprt_reserve_xprt_cong(struct rpc_task *task) |
141 | __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
142 | { | 131 | { |
132 | struct rpc_xprt *xprt = task->tk_xprt; | ||
143 | struct rpc_rqst *req = task->tk_rqstp; | 133 | struct rpc_rqst *req = task->tk_rqstp; |
144 | 134 | ||
145 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { | 135 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { |
146 | if (task == xprt->snd_task) | 136 | if (task == xprt->snd_task) |
147 | return 1; | 137 | return 1; |
148 | goto out_sleep; | 138 | goto out_sleep; |
149 | } | 139 | } |
150 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 140 | if (__xprt_get_cong(xprt, task)) { |
151 | xprt->snd_task = task; | 141 | xprt->snd_task = task; |
152 | if (req) { | 142 | if (req) { |
153 | req->rq_bytes_sent = 0; | 143 | req->rq_bytes_sent = 0; |
@@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | |||
156 | return 1; | 146 | return 1; |
157 | } | 147 | } |
158 | smp_mb__before_clear_bit(); | 148 | smp_mb__before_clear_bit(); |
159 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 149 | clear_bit(XPRT_LOCKED, &xprt->state); |
160 | smp_mb__after_clear_bit(); | 150 | smp_mb__after_clear_bit(); |
161 | out_sleep: | 151 | out_sleep: |
162 | dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); | 152 | dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); |
163 | task->tk_timeout = 0; | 153 | task->tk_timeout = 0; |
164 | task->tk_status = -EAGAIN; | 154 | task->tk_status = -EAGAIN; |
165 | if (req && req->rq_ntrans) | 155 | if (req && req->rq_ntrans) |
@@ -169,26 +159,52 @@ out_sleep: | |||
169 | return 0; | 159 | return 0; |
170 | } | 160 | } |
171 | 161 | ||
172 | static inline int | 162 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
173 | xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
174 | { | 163 | { |
175 | int retval; | 164 | int retval; |
176 | 165 | ||
177 | spin_lock_bh(&xprt->sock_lock); | 166 | spin_lock_bh(&xprt->transport_lock); |
178 | retval = __xprt_lock_write(xprt, task); | 167 | retval = xprt->ops->reserve_xprt(task); |
179 | spin_unlock_bh(&xprt->sock_lock); | 168 | spin_unlock_bh(&xprt->transport_lock); |
180 | return retval; | 169 | return retval; |
181 | } | 170 | } |
182 | 171 | ||
172 | static void __xprt_lock_write_next(struct rpc_xprt *xprt) | ||
173 | { | ||
174 | struct rpc_task *task; | ||
175 | struct rpc_rqst *req; | ||
183 | 176 | ||
184 | static void | 177 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
185 | __xprt_lock_write_next(struct rpc_xprt *xprt) | 178 | return; |
179 | |||
180 | task = rpc_wake_up_next(&xprt->resend); | ||
181 | if (!task) { | ||
182 | task = rpc_wake_up_next(&xprt->sending); | ||
183 | if (!task) | ||
184 | goto out_unlock; | ||
185 | } | ||
186 | |||
187 | req = task->tk_rqstp; | ||
188 | xprt->snd_task = task; | ||
189 | if (req) { | ||
190 | req->rq_bytes_sent = 0; | ||
191 | req->rq_ntrans++; | ||
192 | } | ||
193 | return; | ||
194 | |||
195 | out_unlock: | ||
196 | smp_mb__before_clear_bit(); | ||
197 | clear_bit(XPRT_LOCKED, &xprt->state); | ||
198 | smp_mb__after_clear_bit(); | ||
199 | } | ||
200 | |||
201 | static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) | ||
186 | { | 202 | { |
187 | struct rpc_task *task; | 203 | struct rpc_task *task; |
188 | 204 | ||
189 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 205 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
190 | return; | 206 | return; |
191 | if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) | 207 | if (RPCXPRT_CONGESTED(xprt)) |
192 | goto out_unlock; | 208 | goto out_unlock; |
193 | task = rpc_wake_up_next(&xprt->resend); | 209 | task = rpc_wake_up_next(&xprt->resend); |
194 | if (!task) { | 210 | if (!task) { |
@@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
196 | if (!task) | 212 | if (!task) |
197 | goto out_unlock; | 213 | goto out_unlock; |
198 | } | 214 | } |
199 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 215 | if (__xprt_get_cong(xprt, task)) { |
200 | struct rpc_rqst *req = task->tk_rqstp; | 216 | struct rpc_rqst *req = task->tk_rqstp; |
201 | xprt->snd_task = task; | 217 | xprt->snd_task = task; |
202 | if (req) { | 218 | if (req) { |
@@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
207 | } | 223 | } |
208 | out_unlock: | 224 | out_unlock: |
209 | smp_mb__before_clear_bit(); | 225 | smp_mb__before_clear_bit(); |
210 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 226 | clear_bit(XPRT_LOCKED, &xprt->state); |
211 | smp_mb__after_clear_bit(); | 227 | smp_mb__after_clear_bit(); |
212 | } | 228 | } |
213 | 229 | ||
214 | /* | 230 | /** |
215 | * Releases the socket for use by other requests. | 231 | * xprt_release_xprt - allow other requests to use a transport |
232 | * @xprt: transport with other tasks potentially waiting | ||
233 | * @task: task that is releasing access to the transport | ||
234 | * | ||
235 | * Note that "task" can be NULL. No congestion control is provided. | ||
216 | */ | 236 | */ |
217 | static void | 237 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
218 | __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
219 | { | 238 | { |
220 | if (xprt->snd_task == task) { | 239 | if (xprt->snd_task == task) { |
221 | xprt->snd_task = NULL; | 240 | xprt->snd_task = NULL; |
222 | smp_mb__before_clear_bit(); | 241 | smp_mb__before_clear_bit(); |
223 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 242 | clear_bit(XPRT_LOCKED, &xprt->state); |
224 | smp_mb__after_clear_bit(); | 243 | smp_mb__after_clear_bit(); |
225 | __xprt_lock_write_next(xprt); | 244 | __xprt_lock_write_next(xprt); |
226 | } | 245 | } |
227 | } | 246 | } |
228 | 247 | ||
229 | static inline void | 248 | /** |
230 | xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | 249 | * xprt_release_xprt_cong - allow other requests to use a transport |
231 | { | 250 | * @xprt: transport with other tasks potentially waiting |
232 | spin_lock_bh(&xprt->sock_lock); | 251 | * @task: task that is releasing access to the transport |
233 | __xprt_release_write(xprt, task); | 252 | * |
234 | spin_unlock_bh(&xprt->sock_lock); | 253 | * Note that "task" can be NULL. Another task is awoken to use the |
235 | } | 254 | * transport if the transport's congestion window allows it. |
236 | |||
237 | /* | ||
238 | * Write data to socket. | ||
239 | */ | 255 | */ |
240 | static inline int | 256 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
241 | xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) | ||
242 | { | 257 | { |
243 | struct socket *sock = xprt->sock; | 258 | if (xprt->snd_task == task) { |
244 | struct xdr_buf *xdr = &req->rq_snd_buf; | 259 | xprt->snd_task = NULL; |
245 | struct sockaddr *addr = NULL; | 260 | smp_mb__before_clear_bit(); |
246 | int addrlen = 0; | 261 | clear_bit(XPRT_LOCKED, &xprt->state); |
247 | unsigned int skip; | 262 | smp_mb__after_clear_bit(); |
248 | int result; | 263 | __xprt_lock_write_next_cong(xprt); |
249 | |||
250 | if (!sock) | ||
251 | return -ENOTCONN; | ||
252 | |||
253 | xprt_pktdump("packet data:", | ||
254 | req->rq_svec->iov_base, | ||
255 | req->rq_svec->iov_len); | ||
256 | |||
257 | /* For UDP, we need to provide an address */ | ||
258 | if (!xprt->stream) { | ||
259 | addr = (struct sockaddr *) &xprt->addr; | ||
260 | addrlen = sizeof(xprt->addr); | ||
261 | } | 264 | } |
262 | /* Dont repeat bytes */ | 265 | } |
263 | skip = req->rq_bytes_sent; | ||
264 | |||
265 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
266 | result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); | ||
267 | |||
268 | dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); | ||
269 | |||
270 | if (result >= 0) | ||
271 | return result; | ||
272 | 266 | ||
273 | switch (result) { | 267 | static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
274 | case -ECONNREFUSED: | 268 | { |
275 | /* When the server has died, an ICMP port unreachable message | 269 | spin_lock_bh(&xprt->transport_lock); |
276 | * prompts ECONNREFUSED. | 270 | xprt->ops->release_xprt(xprt, task); |
277 | */ | 271 | spin_unlock_bh(&xprt->transport_lock); |
278 | case -EAGAIN: | ||
279 | break; | ||
280 | case -ECONNRESET: | ||
281 | case -ENOTCONN: | ||
282 | case -EPIPE: | ||
283 | /* connection broken */ | ||
284 | if (xprt->stream) | ||
285 | result = -ENOTCONN; | ||
286 | break; | ||
287 | default: | ||
288 | printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); | ||
289 | } | ||
290 | return result; | ||
291 | } | 272 | } |
292 | 273 | ||
293 | /* | 274 | /* |
@@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) | |||
321 | return; | 302 | return; |
322 | req->rq_cong = 0; | 303 | req->rq_cong = 0; |
323 | xprt->cong -= RPC_CWNDSCALE; | 304 | xprt->cong -= RPC_CWNDSCALE; |
324 | __xprt_lock_write_next(xprt); | 305 | __xprt_lock_write_next_cong(xprt); |
325 | } | 306 | } |
326 | 307 | ||
327 | /* | 308 | /** |
328 | * Adjust RPC congestion window | 309 | * xprt_release_rqst_cong - housekeeping when request is complete |
310 | * @task: RPC request that recently completed | ||
311 | * | ||
312 | * Useful for transports that require congestion control. | ||
313 | */ | ||
314 | void xprt_release_rqst_cong(struct rpc_task *task) | ||
315 | { | ||
316 | __xprt_put_cong(task->tk_xprt, task->tk_rqstp); | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * xprt_adjust_cwnd - adjust transport congestion window | ||
321 | * @task: recently completed RPC request used to adjust window | ||
322 | * @result: result code of completed RPC request | ||
323 | * | ||
329 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 324 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. |
330 | */ | 325 | */ |
331 | static void | 326 | void xprt_adjust_cwnd(struct rpc_task *task, int result) |
332 | xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | ||
333 | { | 327 | { |
334 | unsigned long cwnd; | 328 | struct rpc_rqst *req = task->tk_rqstp; |
329 | struct rpc_xprt *xprt = task->tk_xprt; | ||
330 | unsigned long cwnd = xprt->cwnd; | ||
335 | 331 | ||
336 | cwnd = xprt->cwnd; | ||
337 | if (result >= 0 && cwnd <= xprt->cong) { | 332 | if (result >= 0 && cwnd <= xprt->cong) { |
338 | /* The (cwnd >> 1) term makes sure | 333 | /* The (cwnd >> 1) term makes sure |
339 | * the result gets rounded properly. */ | 334 | * the result gets rounded properly. */ |
340 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; | 335 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; |
341 | if (cwnd > RPC_MAXCWND(xprt)) | 336 | if (cwnd > RPC_MAXCWND(xprt)) |
342 | cwnd = RPC_MAXCWND(xprt); | 337 | cwnd = RPC_MAXCWND(xprt); |
343 | __xprt_lock_write_next(xprt); | 338 | __xprt_lock_write_next_cong(xprt); |
344 | } else if (result == -ETIMEDOUT) { | 339 | } else if (result == -ETIMEDOUT) { |
345 | cwnd >>= 1; | 340 | cwnd >>= 1; |
346 | if (cwnd < RPC_CWNDSCALE) | 341 | if (cwnd < RPC_CWNDSCALE) |
@@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | |||
349 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", | 344 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", |
350 | xprt->cong, xprt->cwnd, cwnd); | 345 | xprt->cong, xprt->cwnd, cwnd); |
351 | xprt->cwnd = cwnd; | 346 | xprt->cwnd = cwnd; |
347 | __xprt_put_cong(xprt, req); | ||
348 | } | ||
349 | |||
350 | /** | ||
351 | * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue | ||
352 | * @xprt: transport with waiting tasks | ||
353 | * @status: result code to plant in each task before waking it | ||
354 | * | ||
355 | */ | ||
356 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) | ||
357 | { | ||
358 | if (status < 0) | ||
359 | rpc_wake_up_status(&xprt->pending, status); | ||
360 | else | ||
361 | rpc_wake_up(&xprt->pending); | ||
362 | } | ||
363 | |||
364 | /** | ||
365 | * xprt_wait_for_buffer_space - wait for transport output buffer to clear | ||
366 | * @task: task to be put to sleep | ||
367 | * | ||
368 | */ | ||
369 | void xprt_wait_for_buffer_space(struct rpc_task *task) | ||
370 | { | ||
371 | struct rpc_rqst *req = task->tk_rqstp; | ||
372 | struct rpc_xprt *xprt = req->rq_xprt; | ||
373 | |||
374 | task->tk_timeout = req->rq_timeout; | ||
375 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
376 | } | ||
377 | |||
378 | /** | ||
379 | * xprt_write_space - wake the task waiting for transport output buffer space | ||
380 | * @xprt: transport with waiting tasks | ||
381 | * | ||
382 | * Can be called in a soft IRQ context, so xprt_write_space never sleeps. | ||
383 | */ | ||
384 | void xprt_write_space(struct rpc_xprt *xprt) | ||
385 | { | ||
386 | if (unlikely(xprt->shutdown)) | ||
387 | return; | ||
388 | |||
389 | spin_lock_bh(&xprt->transport_lock); | ||
390 | if (xprt->snd_task) { | ||
391 | dprintk("RPC: write space: waking waiting task on xprt %p\n", | ||
392 | xprt); | ||
393 | rpc_wake_up_task(xprt->snd_task); | ||
394 | } | ||
395 | spin_unlock_bh(&xprt->transport_lock); | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * xprt_set_retrans_timeout_def - set a request's retransmit timeout | ||
400 | * @task: task whose timeout is to be set | ||
401 | * | ||
402 | * Set a request's retransmit timeout based on the transport's | ||
403 | * default timeout parameters. Used by transports that don't adjust | ||
404 | * the retransmit timeout based on round-trip time estimation. | ||
405 | */ | ||
406 | void xprt_set_retrans_timeout_def(struct rpc_task *task) | ||
407 | { | ||
408 | task->tk_timeout = task->tk_rqstp->rq_timeout; | ||
352 | } | 409 | } |
353 | 410 | ||
354 | /* | 411 | /* |
355 | * Reset the major timeout value | 412 | * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout |
413 | * @task: task whose timeout is to be set | ||
414 | * | ||
415 | * Set a request's retransmit timeout using the RTT estimator. | ||
356 | */ | 416 | */ |
417 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task) | ||
418 | { | ||
419 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
420 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; | ||
421 | struct rpc_rqst *req = task->tk_rqstp; | ||
422 | unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; | ||
423 | |||
424 | task->tk_timeout = rpc_calc_rto(rtt, timer); | ||
425 | task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; | ||
426 | if (task->tk_timeout > max_timeout || task->tk_timeout == 0) | ||
427 | task->tk_timeout = max_timeout; | ||
428 | } | ||
429 | |||
357 | static void xprt_reset_majortimeo(struct rpc_rqst *req) | 430 | static void xprt_reset_majortimeo(struct rpc_rqst *req) |
358 | { | 431 | { |
359 | struct rpc_timeout *to = &req->rq_xprt->timeout; | 432 | struct rpc_timeout *to = &req->rq_xprt->timeout; |
@@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) | |||
368 | req->rq_majortimeo += jiffies; | 441 | req->rq_majortimeo += jiffies; |
369 | } | 442 | } |
370 | 443 | ||
371 | /* | 444 | /** |
372 | * Adjust timeout values etc for next retransmit | 445 | * xprt_adjust_timeout - adjust timeout values for next retransmit |
446 | * @req: RPC request containing parameters to use for the adjustment | ||
447 | * | ||
373 | */ | 448 | */ |
374 | int xprt_adjust_timeout(struct rpc_rqst *req) | 449 | int xprt_adjust_timeout(struct rpc_rqst *req) |
375 | { | 450 | { |
@@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
391 | req->rq_retries = 0; | 466 | req->rq_retries = 0; |
392 | xprt_reset_majortimeo(req); | 467 | xprt_reset_majortimeo(req); |
393 | /* Reset the RTT counters == "slow start" */ | 468 | /* Reset the RTT counters == "slow start" */ |
394 | spin_lock_bh(&xprt->sock_lock); | 469 | spin_lock_bh(&xprt->transport_lock); |
395 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); | 470 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); |
396 | spin_unlock_bh(&xprt->sock_lock); | 471 | spin_unlock_bh(&xprt->transport_lock); |
397 | pprintk("RPC: %lu timeout\n", jiffies); | 472 | pprintk("RPC: %lu timeout\n", jiffies); |
398 | status = -ETIMEDOUT; | 473 | status = -ETIMEDOUT; |
399 | } | 474 | } |
@@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
405 | return status; | 480 | return status; |
406 | } | 481 | } |
407 | 482 | ||
408 | /* | 483 | static void xprt_autoclose(void *args) |
409 | * Close down a transport socket | ||
410 | */ | ||
411 | static void | ||
412 | xprt_close(struct rpc_xprt *xprt) | ||
413 | { | ||
414 | struct socket *sock = xprt->sock; | ||
415 | struct sock *sk = xprt->inet; | ||
416 | |||
417 | if (!sk) | ||
418 | return; | ||
419 | |||
420 | write_lock_bh(&sk->sk_callback_lock); | ||
421 | xprt->inet = NULL; | ||
422 | xprt->sock = NULL; | ||
423 | |||
424 | sk->sk_user_data = NULL; | ||
425 | sk->sk_data_ready = xprt->old_data_ready; | ||
426 | sk->sk_state_change = xprt->old_state_change; | ||
427 | sk->sk_write_space = xprt->old_write_space; | ||
428 | write_unlock_bh(&sk->sk_callback_lock); | ||
429 | |||
430 | sk->sk_no_check = 0; | ||
431 | |||
432 | sock_release(sock); | ||
433 | } | ||
434 | |||
435 | static void | ||
436 | xprt_socket_autoclose(void *args) | ||
437 | { | 484 | { |
438 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | 485 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; |
439 | 486 | ||
440 | xprt_disconnect(xprt); | 487 | xprt_disconnect(xprt); |
441 | xprt_close(xprt); | 488 | xprt->ops->close(xprt); |
442 | xprt_release_write(xprt, NULL); | 489 | xprt_release_write(xprt, NULL); |
443 | } | 490 | } |
444 | 491 | ||
445 | /* | 492 | /** |
446 | * Mark a transport as disconnected | 493 | * xprt_disconnect - mark a transport as disconnected |
494 | * @xprt: transport to flag for disconnect | ||
495 | * | ||
447 | */ | 496 | */ |
448 | static void | 497 | void xprt_disconnect(struct rpc_xprt *xprt) |
449 | xprt_disconnect(struct rpc_xprt *xprt) | ||
450 | { | 498 | { |
451 | dprintk("RPC: disconnected transport %p\n", xprt); | 499 | dprintk("RPC: disconnected transport %p\n", xprt); |
452 | spin_lock_bh(&xprt->sock_lock); | 500 | spin_lock_bh(&xprt->transport_lock); |
453 | xprt_clear_connected(xprt); | 501 | xprt_clear_connected(xprt); |
454 | rpc_wake_up_status(&xprt->pending, -ENOTCONN); | 502 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
455 | spin_unlock_bh(&xprt->sock_lock); | 503 | spin_unlock_bh(&xprt->transport_lock); |
456 | } | 504 | } |
457 | 505 | ||
458 | /* | ||
459 | * Used to allow disconnection when we've been idle | ||
460 | */ | ||
461 | static void | 506 | static void |
462 | xprt_init_autodisconnect(unsigned long data) | 507 | xprt_init_autodisconnect(unsigned long data) |
463 | { | 508 | { |
464 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; | 509 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; |
465 | 510 | ||
466 | spin_lock(&xprt->sock_lock); | 511 | spin_lock(&xprt->transport_lock); |
467 | if (!list_empty(&xprt->recv) || xprt->shutdown) | 512 | if (!list_empty(&xprt->recv) || xprt->shutdown) |
468 | goto out_abort; | 513 | goto out_abort; |
469 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 514 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
470 | goto out_abort; | 515 | goto out_abort; |
471 | spin_unlock(&xprt->sock_lock); | 516 | spin_unlock(&xprt->transport_lock); |
472 | /* Let keventd close the socket */ | 517 | if (xprt_connecting(xprt)) |
473 | if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) | ||
474 | xprt_release_write(xprt, NULL); | 518 | xprt_release_write(xprt, NULL); |
475 | else | 519 | else |
476 | schedule_work(&xprt->task_cleanup); | 520 | schedule_work(&xprt->task_cleanup); |
477 | return; | 521 | return; |
478 | out_abort: | 522 | out_abort: |
479 | spin_unlock(&xprt->sock_lock); | 523 | spin_unlock(&xprt->transport_lock); |
480 | } | ||
481 | |||
482 | static void xprt_socket_connect(void *args) | ||
483 | { | ||
484 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
485 | struct socket *sock = xprt->sock; | ||
486 | int status = -EIO; | ||
487 | |||
488 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
489 | goto out; | ||
490 | |||
491 | /* | ||
492 | * Start by resetting any existing state | ||
493 | */ | ||
494 | xprt_close(xprt); | ||
495 | sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); | ||
496 | if (sock == NULL) { | ||
497 | /* couldn't create socket or bind to reserved port; | ||
498 | * this is likely a permanent error, so cause an abort */ | ||
499 | goto out; | ||
500 | } | ||
501 | xprt_bind_socket(xprt, sock); | ||
502 | xprt_sock_setbufsize(xprt); | ||
503 | |||
504 | status = 0; | ||
505 | if (!xprt->stream) | ||
506 | goto out; | ||
507 | |||
508 | /* | ||
509 | * Tell the socket layer to start connecting... | ||
510 | */ | ||
511 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
512 | sizeof(xprt->addr), O_NONBLOCK); | ||
513 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
514 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
515 | if (status < 0) { | ||
516 | switch (status) { | ||
517 | case -EINPROGRESS: | ||
518 | case -EALREADY: | ||
519 | goto out_clear; | ||
520 | } | ||
521 | } | ||
522 | out: | ||
523 | if (status < 0) | ||
524 | rpc_wake_up_status(&xprt->pending, status); | ||
525 | else | ||
526 | rpc_wake_up(&xprt->pending); | ||
527 | out_clear: | ||
528 | smp_mb__before_clear_bit(); | ||
529 | clear_bit(XPRT_CONNECTING, &xprt->sockstate); | ||
530 | smp_mb__after_clear_bit(); | ||
531 | } | 524 | } |
532 | 525 | ||
533 | /* | 526 | /** |
534 | * Attempt to connect a TCP socket. | 527 | * xprt_connect - schedule a transport connect operation |
528 | * @task: RPC task that is requesting the connect | ||
535 | * | 529 | * |
536 | */ | 530 | */ |
537 | void xprt_connect(struct rpc_task *task) | 531 | void xprt_connect(struct rpc_task *task) |
@@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task) | |||
552 | if (!xprt_lock_write(xprt, task)) | 546 | if (!xprt_lock_write(xprt, task)) |
553 | return; | 547 | return; |
554 | if (xprt_connected(xprt)) | 548 | if (xprt_connected(xprt)) |
555 | goto out_write; | 549 | xprt_release_write(xprt, task); |
550 | else { | ||
551 | if (task->tk_rqstp) | ||
552 | task->tk_rqstp->rq_bytes_sent = 0; | ||
556 | 553 | ||
557 | if (task->tk_rqstp) | 554 | task->tk_timeout = xprt->connect_timeout; |
558 | task->tk_rqstp->rq_bytes_sent = 0; | 555 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); |
559 | 556 | xprt->ops->connect(task); | |
560 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | ||
561 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); | ||
562 | if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { | ||
563 | /* Note: if we are here due to a dropped connection | ||
564 | * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ | ||
565 | * seconds | ||
566 | */ | ||
567 | if (xprt->sock != NULL) | ||
568 | schedule_delayed_work(&xprt->sock_connect, | ||
569 | RPC_REESTABLISH_TIMEOUT); | ||
570 | else { | ||
571 | schedule_work(&xprt->sock_connect); | ||
572 | if (!RPC_IS_ASYNC(task)) | ||
573 | flush_scheduled_work(); | ||
574 | } | ||
575 | } | 557 | } |
576 | return; | 558 | return; |
577 | out_write: | ||
578 | xprt_release_write(xprt, task); | ||
579 | } | 559 | } |
580 | 560 | ||
581 | /* | 561 | static void xprt_connect_status(struct rpc_task *task) |
582 | * We arrive here when awoken from waiting on connection establishment. | ||
583 | */ | ||
584 | static void | ||
585 | xprt_connect_status(struct rpc_task *task) | ||
586 | { | 562 | { |
587 | struct rpc_xprt *xprt = task->tk_xprt; | 563 | struct rpc_xprt *xprt = task->tk_xprt; |
588 | 564 | ||
@@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task) | |||
592 | return; | 568 | return; |
593 | } | 569 | } |
594 | 570 | ||
595 | /* if soft mounted, just cause this RPC to fail */ | ||
596 | if (RPC_IS_SOFT(task)) | ||
597 | task->tk_status = -EIO; | ||
598 | |||
599 | switch (task->tk_status) { | 571 | switch (task->tk_status) { |
600 | case -ECONNREFUSED: | 572 | case -ECONNREFUSED: |
601 | case -ECONNRESET: | 573 | case -ECONNRESET: |
574 | dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n", | ||
575 | task->tk_pid, task->tk_client->cl_server); | ||
576 | break; | ||
602 | case -ENOTCONN: | 577 | case -ENOTCONN: |
603 | return; | 578 | dprintk("RPC: %4d xprt_connect_status: connection broken\n", |
579 | task->tk_pid); | ||
580 | break; | ||
604 | case -ETIMEDOUT: | 581 | case -ETIMEDOUT: |
605 | dprintk("RPC: %4d xprt_connect_status: timed out\n", | 582 | dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n", |
606 | task->tk_pid); | 583 | task->tk_pid); |
607 | break; | 584 | break; |
608 | default: | 585 | default: |
609 | printk(KERN_ERR "RPC: error %d connecting to server %s\n", | 586 | dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n", |
610 | -task->tk_status, task->tk_client->cl_server); | 587 | task->tk_pid, -task->tk_status, task->tk_client->cl_server); |
588 | xprt_release_write(xprt, task); | ||
589 | task->tk_status = -EIO; | ||
590 | return; | ||
591 | } | ||
592 | |||
593 | /* if soft mounted, just cause this RPC to fail */ | ||
594 | if (RPC_IS_SOFT(task)) { | ||
595 | xprt_release_write(xprt, task); | ||
596 | task->tk_status = -EIO; | ||
611 | } | 597 | } |
612 | xprt_release_write(xprt, task); | ||
613 | } | 598 | } |
614 | 599 | ||
615 | /* | 600 | /** |
616 | * Look up the RPC request corresponding to a reply, and then lock it. | 601 | * xprt_lookup_rqst - find an RPC request corresponding to an XID |
602 | * @xprt: transport on which the original request was transmitted | ||
603 | * @xid: RPC XID of incoming reply | ||
604 | * | ||
617 | */ | 605 | */ |
618 | static inline struct rpc_rqst * | 606 | struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) |
619 | xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | ||
620 | { | 607 | { |
621 | struct list_head *pos; | 608 | struct list_head *pos; |
622 | struct rpc_rqst *req = NULL; | 609 | struct rpc_rqst *req = NULL; |
@@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | |||
631 | return req; | 618 | return req; |
632 | } | 619 | } |
633 | 620 | ||
634 | /* | 621 | /** |
635 | * Complete reply received. | 622 | * xprt_update_rtt - update an RPC client's RTT state after receiving a reply |
636 | * The TCP code relies on us to remove the request from xprt->pending. | 623 | * @task: RPC request that recently completed |
637 | */ | 624 | * |
638 | static void | ||
639 | xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) | ||
640 | { | ||
641 | struct rpc_task *task = req->rq_task; | ||
642 | struct rpc_clnt *clnt = task->tk_client; | ||
643 | |||
644 | /* Adjust congestion window */ | ||
645 | if (!xprt->nocong) { | ||
646 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | ||
647 | xprt_adjust_cwnd(xprt, copied); | ||
648 | __xprt_put_cong(xprt, req); | ||
649 | if (timer) { | ||
650 | if (req->rq_ntrans == 1) | ||
651 | rpc_update_rtt(clnt->cl_rtt, timer, | ||
652 | (long)jiffies - req->rq_xtime); | ||
653 | rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); | ||
654 | } | ||
655 | } | ||
656 | |||
657 | #ifdef RPC_PROFILE | ||
658 | /* Profile only reads for now */ | ||
659 | if (copied > 1024) { | ||
660 | static unsigned long nextstat; | ||
661 | static unsigned long pkt_rtt, pkt_len, pkt_cnt; | ||
662 | |||
663 | pkt_cnt++; | ||
664 | pkt_len += req->rq_slen + copied; | ||
665 | pkt_rtt += jiffies - req->rq_xtime; | ||
666 | if (time_before(nextstat, jiffies)) { | ||
667 | printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); | ||
668 | printk("RPC: %ld %ld %ld %ld stat\n", | ||
669 | jiffies, pkt_cnt, pkt_len, pkt_rtt); | ||
670 | pkt_rtt = pkt_len = pkt_cnt = 0; | ||
671 | nextstat = jiffies + 5 * HZ; | ||
672 | } | ||
673 | } | ||
674 | #endif | ||
675 | |||
676 | dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); | ||
677 | list_del_init(&req->rq_list); | ||
678 | req->rq_received = req->rq_private_buf.len = copied; | ||
679 | |||
680 | /* ... and wake up the process. */ | ||
681 | rpc_wake_up_task(task); | ||
682 | return; | ||
683 | } | ||
684 | |||
685 | static size_t | ||
686 | skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
687 | { | ||
688 | if (len > desc->count) | ||
689 | len = desc->count; | ||
690 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
691 | return 0; | ||
692 | desc->count -= len; | ||
693 | desc->offset += len; | ||
694 | return len; | ||
695 | } | ||
696 | |||
697 | static size_t | ||
698 | skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
699 | { | ||
700 | unsigned int csum2, pos; | ||
701 | |||
702 | if (len > desc->count) | ||
703 | len = desc->count; | ||
704 | pos = desc->offset; | ||
705 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
706 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
707 | desc->count -= len; | ||
708 | desc->offset += len; | ||
709 | return len; | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * We have set things up such that we perform the checksum of the UDP | ||
714 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
715 | */ | ||
716 | int | ||
717 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
718 | { | ||
719 | skb_reader_t desc; | ||
720 | |||
721 | desc.skb = skb; | ||
722 | desc.offset = sizeof(struct udphdr); | ||
723 | desc.count = skb->len - desc.offset; | ||
724 | |||
725 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
726 | goto no_checksum; | ||
727 | |||
728 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
729 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
730 | return -1; | ||
731 | if (desc.offset != skb->len) { | ||
732 | unsigned int csum2; | ||
733 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
734 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
735 | } | ||
736 | if (desc.count) | ||
737 | return -1; | ||
738 | if ((unsigned short)csum_fold(desc.csum)) | ||
739 | return -1; | ||
740 | return 0; | ||
741 | no_checksum: | ||
742 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
743 | return -1; | ||
744 | if (desc.count) | ||
745 | return -1; | ||
746 | return 0; | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | * Input handler for RPC replies. Called from a bottom half and hence | ||
751 | * atomic. | ||
752 | */ | ||
753 | static void | ||
754 | udp_data_ready(struct sock *sk, int len) | ||
755 | { | ||
756 | struct rpc_task *task; | ||
757 | struct rpc_xprt *xprt; | ||
758 | struct rpc_rqst *rovr; | ||
759 | struct sk_buff *skb; | ||
760 | int err, repsize, copied; | ||
761 | u32 _xid, *xp; | ||
762 | |||
763 | read_lock(&sk->sk_callback_lock); | ||
764 | dprintk("RPC: udp_data_ready...\n"); | ||
765 | if (!(xprt = xprt_from_sock(sk))) { | ||
766 | printk("RPC: udp_data_ready request not found!\n"); | ||
767 | goto out; | ||
768 | } | ||
769 | |||
770 | dprintk("RPC: udp_data_ready client %p\n", xprt); | ||
771 | |||
772 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
773 | goto out; | ||
774 | |||
775 | if (xprt->shutdown) | ||
776 | goto dropit; | ||
777 | |||
778 | repsize = skb->len - sizeof(struct udphdr); | ||
779 | if (repsize < 4) { | ||
780 | printk("RPC: impossible RPC reply size %d!\n", repsize); | ||
781 | goto dropit; | ||
782 | } | ||
783 | |||
784 | /* Copy the XID from the skb... */ | ||
785 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
786 | sizeof(_xid), &_xid); | ||
787 | if (xp == NULL) | ||
788 | goto dropit; | ||
789 | |||
790 | /* Look up and lock the request corresponding to the given XID */ | ||
791 | spin_lock(&xprt->sock_lock); | ||
792 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
793 | if (!rovr) | ||
794 | goto out_unlock; | ||
795 | task = rovr->rq_task; | ||
796 | |||
797 | dprintk("RPC: %4d received reply\n", task->tk_pid); | ||
798 | |||
799 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
800 | copied = repsize; | ||
801 | |||
802 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
803 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
804 | goto out_unlock; | ||
805 | |||
806 | /* Something worked... */ | ||
807 | dst_confirm(skb->dst); | ||
808 | |||
809 | xprt_complete_rqst(xprt, rovr, copied); | ||
810 | |||
811 | out_unlock: | ||
812 | spin_unlock(&xprt->sock_lock); | ||
813 | dropit: | ||
814 | skb_free_datagram(sk, skb); | ||
815 | out: | ||
816 | read_unlock(&sk->sk_callback_lock); | ||
817 | } | ||
818 | |||
819 | /* | ||
820 | * Copy from an skb into memory and shrink the skb. | ||
821 | */ | ||
822 | static inline size_t | ||
823 | tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
824 | { | ||
825 | if (len > desc->count) | ||
826 | len = desc->count; | ||
827 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
828 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
829 | len, desc->count); | ||
830 | return 0; | ||
831 | } | ||
832 | desc->offset += len; | ||
833 | desc->count -= len; | ||
834 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
835 | len, desc->count); | ||
836 | return len; | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * TCP read fragment marker | ||
841 | */ | ||
842 | static inline void | ||
843 | tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
844 | { | ||
845 | size_t len, used; | ||
846 | char *p; | ||
847 | |||
848 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
849 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
850 | used = tcp_copy_data(desc, p, len); | ||
851 | xprt->tcp_offset += used; | ||
852 | if (used != len) | ||
853 | return; | ||
854 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
855 | if (xprt->tcp_reclen & 0x80000000) | ||
856 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
857 | else | ||
858 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
859 | xprt->tcp_reclen &= 0x7fffffff; | ||
860 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
861 | xprt->tcp_offset = 0; | ||
862 | /* Sanity check of the record length */ | ||
863 | if (xprt->tcp_reclen < 4) { | ||
864 | printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); | ||
865 | xprt_disconnect(xprt); | ||
866 | } | ||
867 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
868 | xprt->tcp_reclen); | ||
869 | } | ||
870 | |||
871 | static void | ||
872 | tcp_check_recm(struct rpc_xprt *xprt) | ||
873 | { | ||
874 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
875 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
876 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
877 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
878 | xprt->tcp_offset = 0; | ||
879 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
880 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
881 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
882 | xprt->tcp_copied = 0; | ||
883 | } | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * TCP read xid | ||
889 | */ | ||
890 | static inline void | ||
891 | tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
892 | { | ||
893 | size_t len, used; | ||
894 | char *p; | ||
895 | |||
896 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
897 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
898 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
899 | used = tcp_copy_data(desc, p, len); | ||
900 | xprt->tcp_offset += used; | ||
901 | if (used != len) | ||
902 | return; | ||
903 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
904 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
905 | xprt->tcp_copied = 4; | ||
906 | dprintk("RPC: reading reply for XID %08x\n", | ||
907 | ntohl(xprt->tcp_xid)); | ||
908 | tcp_check_recm(xprt); | ||
909 | } | ||
910 | |||
911 | /* | ||
912 | * TCP read and complete request | ||
913 | */ | ||
914 | static inline void | ||
915 | tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
916 | { | ||
917 | struct rpc_rqst *req; | ||
918 | struct xdr_buf *rcvbuf; | ||
919 | size_t len; | ||
920 | ssize_t r; | ||
921 | |||
922 | /* Find and lock the request corresponding to this xid */ | ||
923 | spin_lock(&xprt->sock_lock); | ||
924 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
925 | if (!req) { | ||
926 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
927 | dprintk("RPC: XID %08x request not found!\n", | ||
928 | ntohl(xprt->tcp_xid)); | ||
929 | spin_unlock(&xprt->sock_lock); | ||
930 | return; | ||
931 | } | ||
932 | |||
933 | rcvbuf = &req->rq_private_buf; | ||
934 | len = desc->count; | ||
935 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
936 | skb_reader_t my_desc; | ||
937 | |||
938 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
939 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
940 | my_desc.count = len; | ||
941 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
942 | &my_desc, tcp_copy_data); | ||
943 | desc->count -= r; | ||
944 | desc->offset += r; | ||
945 | } else | ||
946 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
947 | desc, tcp_copy_data); | ||
948 | |||
949 | if (r > 0) { | ||
950 | xprt->tcp_copied += r; | ||
951 | xprt->tcp_offset += r; | ||
952 | } | ||
953 | if (r != len) { | ||
954 | /* Error when copying to the receive buffer, | ||
955 | * usually because we weren't able to allocate | ||
956 | * additional buffer pages. All we can do now | ||
957 | * is turn off XPRT_COPY_DATA, so the request | ||
958 | * will not receive any additional updates, | ||
959 | * and time out. | ||
960 | * Any remaining data from this record will | ||
961 | * be discarded. | ||
962 | */ | ||
963 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
964 | dprintk("RPC: XID %08x truncated request\n", | ||
965 | ntohl(xprt->tcp_xid)); | ||
966 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
967 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
968 | goto out; | ||
969 | } | ||
970 | |||
971 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
972 | ntohl(xprt->tcp_xid), r); | ||
973 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
974 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
975 | |||
976 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
977 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
978 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
979 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
980 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
981 | } | ||
982 | |||
983 | out: | ||
984 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { | ||
985 | dprintk("RPC: %4d received reply complete\n", | ||
986 | req->rq_task->tk_pid); | ||
987 | xprt_complete_rqst(xprt, req, xprt->tcp_copied); | ||
988 | } | ||
989 | spin_unlock(&xprt->sock_lock); | ||
990 | tcp_check_recm(xprt); | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * TCP discard extra bytes from a short read | ||
995 | */ | ||
996 | static inline void | ||
997 | tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
998 | { | ||
999 | size_t len; | ||
1000 | |||
1001 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
1002 | if (len > desc->count) | ||
1003 | len = desc->count; | ||
1004 | desc->count -= len; | ||
1005 | desc->offset += len; | ||
1006 | xprt->tcp_offset += len; | ||
1007 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
1008 | tcp_check_recm(xprt); | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * TCP record receive routine | ||
1013 | * We first have to grab the record marker, then the XID, then the data. | ||
1014 | */ | 625 | */ |
1015 | static int | 626 | void xprt_update_rtt(struct rpc_task *task) |
1016 | tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | ||
1017 | unsigned int offset, size_t len) | ||
1018 | { | ||
1019 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
1020 | skb_reader_t desc = { | ||
1021 | .skb = skb, | ||
1022 | .offset = offset, | ||
1023 | .count = len, | ||
1024 | .csum = 0 | ||
1025 | }; | ||
1026 | |||
1027 | dprintk("RPC: tcp_data_recv\n"); | ||
1028 | do { | ||
1029 | /* Read in a new fragment marker if necessary */ | ||
1030 | /* Can we ever really expect to get completely empty fragments? */ | ||
1031 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
1032 | tcp_read_fraghdr(xprt, &desc); | ||
1033 | continue; | ||
1034 | } | ||
1035 | /* Read in the xid if necessary */ | ||
1036 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
1037 | tcp_read_xid(xprt, &desc); | ||
1038 | continue; | ||
1039 | } | ||
1040 | /* Read in the request data */ | ||
1041 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
1042 | tcp_read_request(xprt, &desc); | ||
1043 | continue; | ||
1044 | } | ||
1045 | /* Skip over any trailing bytes on short reads */ | ||
1046 | tcp_read_discard(xprt, &desc); | ||
1047 | } while (desc.count); | ||
1048 | dprintk("RPC: tcp_data_recv done\n"); | ||
1049 | return len - desc.count; | ||
1050 | } | ||
1051 | |||
1052 | static void tcp_data_ready(struct sock *sk, int bytes) | ||
1053 | { | 627 | { |
1054 | struct rpc_xprt *xprt; | 628 | struct rpc_rqst *req = task->tk_rqstp; |
1055 | read_descriptor_t rd_desc; | 629 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; |
1056 | 630 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | |
1057 | read_lock(&sk->sk_callback_lock); | ||
1058 | dprintk("RPC: tcp_data_ready...\n"); | ||
1059 | if (!(xprt = xprt_from_sock(sk))) { | ||
1060 | printk("RPC: tcp_data_ready socket info not found!\n"); | ||
1061 | goto out; | ||
1062 | } | ||
1063 | if (xprt->shutdown) | ||
1064 | goto out; | ||
1065 | |||
1066 | /* We use rd_desc to pass struct xprt to tcp_data_recv */ | ||
1067 | rd_desc.arg.data = xprt; | ||
1068 | rd_desc.count = 65536; | ||
1069 | tcp_read_sock(sk, &rd_desc, tcp_data_recv); | ||
1070 | out: | ||
1071 | read_unlock(&sk->sk_callback_lock); | ||
1072 | } | ||
1073 | |||
1074 | static void | ||
1075 | tcp_state_change(struct sock *sk) | ||
1076 | { | ||
1077 | struct rpc_xprt *xprt; | ||
1078 | 631 | ||
1079 | read_lock(&sk->sk_callback_lock); | 632 | if (timer) { |
1080 | if (!(xprt = xprt_from_sock(sk))) | 633 | if (req->rq_ntrans == 1) |
1081 | goto out; | 634 | rpc_update_rtt(rtt, timer, |
1082 | dprintk("RPC: tcp_state_change client %p...\n", xprt); | 635 | (long)jiffies - req->rq_xtime); |
1083 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | 636 | rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); |
1084 | sk->sk_state, xprt_connected(xprt), | ||
1085 | sock_flag(sk, SOCK_DEAD), | ||
1086 | sock_flag(sk, SOCK_ZAPPED)); | ||
1087 | |||
1088 | switch (sk->sk_state) { | ||
1089 | case TCP_ESTABLISHED: | ||
1090 | spin_lock_bh(&xprt->sock_lock); | ||
1091 | if (!xprt_test_and_set_connected(xprt)) { | ||
1092 | /* Reset TCP record info */ | ||
1093 | xprt->tcp_offset = 0; | ||
1094 | xprt->tcp_reclen = 0; | ||
1095 | xprt->tcp_copied = 0; | ||
1096 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
1097 | rpc_wake_up(&xprt->pending); | ||
1098 | } | ||
1099 | spin_unlock_bh(&xprt->sock_lock); | ||
1100 | break; | ||
1101 | case TCP_SYN_SENT: | ||
1102 | case TCP_SYN_RECV: | ||
1103 | break; | ||
1104 | default: | ||
1105 | xprt_disconnect(xprt); | ||
1106 | break; | ||
1107 | } | 637 | } |
1108 | out: | ||
1109 | read_unlock(&sk->sk_callback_lock); | ||
1110 | } | 638 | } |
1111 | 639 | ||
1112 | /* | 640 | /** |
1113 | * Called when more output buffer space is available for this socket. | 641 | * xprt_complete_rqst - called when reply processing is complete |
1114 | * We try not to wake our writers until they can make "significant" | 642 | * @task: RPC request that recently completed |
1115 | * progress, otherwise we'll waste resources thrashing sock_sendmsg | 643 | * @copied: actual number of bytes received from the transport |
1116 | * with a bunch of small requests. | 644 | * |
645 | * Caller holds transport lock. | ||
1117 | */ | 646 | */ |
1118 | static void | 647 | void xprt_complete_rqst(struct rpc_task *task, int copied) |
1119 | xprt_write_space(struct sock *sk) | ||
1120 | { | 648 | { |
1121 | struct rpc_xprt *xprt; | 649 | struct rpc_rqst *req = task->tk_rqstp; |
1122 | struct socket *sock; | ||
1123 | |||
1124 | read_lock(&sk->sk_callback_lock); | ||
1125 | if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) | ||
1126 | goto out; | ||
1127 | if (xprt->shutdown) | ||
1128 | goto out; | ||
1129 | |||
1130 | /* Wait until we have enough socket memory */ | ||
1131 | if (xprt->stream) { | ||
1132 | /* from net/core/stream.c:sk_stream_write_space */ | ||
1133 | if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) | ||
1134 | goto out; | ||
1135 | } else { | ||
1136 | /* from net/core/sock.c:sock_def_write_space */ | ||
1137 | if (!sock_writeable(sk)) | ||
1138 | goto out; | ||
1139 | } | ||
1140 | 650 | ||
1141 | if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) | 651 | dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", |
1142 | goto out; | 652 | task->tk_pid, ntohl(req->rq_xid), copied); |
1143 | 653 | ||
1144 | spin_lock_bh(&xprt->sock_lock); | 654 | list_del_init(&req->rq_list); |
1145 | if (xprt->snd_task) | 655 | req->rq_received = req->rq_private_buf.len = copied; |
1146 | rpc_wake_up_task(xprt->snd_task); | 656 | rpc_wake_up_task(task); |
1147 | spin_unlock_bh(&xprt->sock_lock); | ||
1148 | out: | ||
1149 | read_unlock(&sk->sk_callback_lock); | ||
1150 | } | 657 | } |
1151 | 658 | ||
1152 | /* | 659 | static void xprt_timer(struct rpc_task *task) |
1153 | * RPC receive timeout handler. | ||
1154 | */ | ||
1155 | static void | ||
1156 | xprt_timer(struct rpc_task *task) | ||
1157 | { | 660 | { |
1158 | struct rpc_rqst *req = task->tk_rqstp; | 661 | struct rpc_rqst *req = task->tk_rqstp; |
1159 | struct rpc_xprt *xprt = req->rq_xprt; | 662 | struct rpc_xprt *xprt = req->rq_xprt; |
1160 | 663 | ||
1161 | spin_lock(&xprt->sock_lock); | 664 | dprintk("RPC: %4d xprt_timer\n", task->tk_pid); |
1162 | if (req->rq_received) | ||
1163 | goto out; | ||
1164 | |||
1165 | xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); | ||
1166 | __xprt_put_cong(xprt, req); | ||
1167 | 665 | ||
1168 | dprintk("RPC: %4d xprt_timer (%s request)\n", | 666 | spin_lock(&xprt->transport_lock); |
1169 | task->tk_pid, req ? "pending" : "backlogged"); | 667 | if (!req->rq_received) { |
1170 | 668 | if (xprt->ops->timer) | |
1171 | task->tk_status = -ETIMEDOUT; | 669 | xprt->ops->timer(task); |
1172 | out: | 670 | task->tk_status = -ETIMEDOUT; |
671 | } | ||
1173 | task->tk_timeout = 0; | 672 | task->tk_timeout = 0; |
1174 | rpc_wake_up_task(task); | 673 | rpc_wake_up_task(task); |
1175 | spin_unlock(&xprt->sock_lock); | 674 | spin_unlock(&xprt->transport_lock); |
1176 | } | 675 | } |
1177 | 676 | ||
1178 | /* | 677 | /** |
1179 | * Place the actual RPC call. | 678 | * xprt_prepare_transmit - reserve the transport before sending a request |
1180 | * We have to copy the iovec because sendmsg fiddles with its contents. | 679 | * @task: RPC task about to send a request |
680 | * | ||
1181 | */ | 681 | */ |
1182 | int | 682 | int xprt_prepare_transmit(struct rpc_task *task) |
1183 | xprt_prepare_transmit(struct rpc_task *task) | ||
1184 | { | 683 | { |
1185 | struct rpc_rqst *req = task->tk_rqstp; | 684 | struct rpc_rqst *req = task->tk_rqstp; |
1186 | struct rpc_xprt *xprt = req->rq_xprt; | 685 | struct rpc_xprt *xprt = req->rq_xprt; |
@@ -1191,12 +690,12 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
1191 | if (xprt->shutdown) | 690 | if (xprt->shutdown) |
1192 | return -EIO; | 691 | return -EIO; |
1193 | 692 | ||
1194 | spin_lock_bh(&xprt->sock_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
1195 | if (req->rq_received && !req->rq_bytes_sent) { | 694 | if (req->rq_received && !req->rq_bytes_sent) { |
1196 | err = req->rq_received; | 695 | err = req->rq_received; |
1197 | goto out_unlock; | 696 | goto out_unlock; |
1198 | } | 697 | } |
1199 | if (!__xprt_lock_write(xprt, task)) { | 698 | if (!xprt->ops->reserve_xprt(task)) { |
1200 | err = -EAGAIN; | 699 | err = -EAGAIN; |
1201 | goto out_unlock; | 700 | goto out_unlock; |
1202 | } | 701 | } |
@@ -1206,39 +705,42 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
1206 | goto out_unlock; | 705 | goto out_unlock; |
1207 | } | 706 | } |
1208 | out_unlock: | 707 | out_unlock: |
1209 | spin_unlock_bh(&xprt->sock_lock); | 708 | spin_unlock_bh(&xprt->transport_lock); |
1210 | return err; | 709 | return err; |
1211 | } | 710 | } |
1212 | 711 | ||
1213 | void | 712 | void |
1214 | xprt_transmit(struct rpc_task *task) | 713 | xprt_abort_transmit(struct rpc_task *task) |
714 | { | ||
715 | struct rpc_xprt *xprt = task->tk_xprt; | ||
716 | |||
717 | xprt_release_write(xprt, task); | ||
718 | } | ||
719 | |||
720 | /** | ||
721 | * xprt_transmit - send an RPC request on a transport | ||
722 | * @task: controlling RPC task | ||
723 | * | ||
724 | * We have to copy the iovec because sendmsg fiddles with its contents. | ||
725 | */ | ||
726 | void xprt_transmit(struct rpc_task *task) | ||
1215 | { | 727 | { |
1216 | struct rpc_clnt *clnt = task->tk_client; | ||
1217 | struct rpc_rqst *req = task->tk_rqstp; | 728 | struct rpc_rqst *req = task->tk_rqstp; |
1218 | struct rpc_xprt *xprt = req->rq_xprt; | 729 | struct rpc_xprt *xprt = req->rq_xprt; |
1219 | int status, retry = 0; | 730 | int status; |
1220 | |||
1221 | 731 | ||
1222 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); | 732 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); |
1223 | 733 | ||
1224 | /* set up everything as needed. */ | ||
1225 | /* Write the record marker */ | ||
1226 | if (xprt->stream) { | ||
1227 | u32 *marker = req->rq_svec[0].iov_base; | ||
1228 | |||
1229 | *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); | ||
1230 | } | ||
1231 | |||
1232 | smp_rmb(); | 734 | smp_rmb(); |
1233 | if (!req->rq_received) { | 735 | if (!req->rq_received) { |
1234 | if (list_empty(&req->rq_list)) { | 736 | if (list_empty(&req->rq_list)) { |
1235 | spin_lock_bh(&xprt->sock_lock); | 737 | spin_lock_bh(&xprt->transport_lock); |
1236 | /* Update the softirq receive buffer */ | 738 | /* Update the softirq receive buffer */ |
1237 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, | 739 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, |
1238 | sizeof(req->rq_private_buf)); | 740 | sizeof(req->rq_private_buf)); |
1239 | /* Add request to the receive list */ | 741 | /* Add request to the receive list */ |
1240 | list_add_tail(&req->rq_list, &xprt->recv); | 742 | list_add_tail(&req->rq_list, &xprt->recv); |
1241 | spin_unlock_bh(&xprt->sock_lock); | 743 | spin_unlock_bh(&xprt->transport_lock); |
1242 | xprt_reset_majortimeo(req); | 744 | xprt_reset_majortimeo(req); |
1243 | /* Turn off autodisconnect */ | 745 | /* Turn off autodisconnect */ |
1244 | del_singleshot_timer_sync(&xprt->timer); | 746 | del_singleshot_timer_sync(&xprt->timer); |
@@ -1246,40 +748,19 @@ xprt_transmit(struct rpc_task *task) | |||
1246 | } else if (!req->rq_bytes_sent) | 748 | } else if (!req->rq_bytes_sent) |
1247 | return; | 749 | return; |
1248 | 750 | ||
1249 | /* Continue transmitting the packet/record. We must be careful | 751 | status = xprt->ops->send_request(task); |
1250 | * to cope with writespace callbacks arriving _after_ we have | 752 | if (status == 0) { |
1251 | * called xprt_sendmsg(). | 753 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); |
1252 | */ | 754 | spin_lock_bh(&xprt->transport_lock); |
1253 | while (1) { | 755 | xprt->ops->set_retrans_timeout(task); |
1254 | req->rq_xtime = jiffies; | 756 | /* Don't race with disconnect */ |
1255 | status = xprt_sendmsg(xprt, req); | 757 | if (!xprt_connected(xprt)) |
1256 | 758 | task->tk_status = -ENOTCONN; | |
1257 | if (status < 0) | 759 | else if (!req->rq_received) |
1258 | break; | 760 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); |
1259 | 761 | xprt->ops->release_xprt(xprt, task); | |
1260 | if (xprt->stream) { | 762 | spin_unlock_bh(&xprt->transport_lock); |
1261 | req->rq_bytes_sent += status; | 763 | return; |
1262 | |||
1263 | /* If we've sent the entire packet, immediately | ||
1264 | * reset the count of bytes sent. */ | ||
1265 | if (req->rq_bytes_sent >= req->rq_slen) { | ||
1266 | req->rq_bytes_sent = 0; | ||
1267 | goto out_receive; | ||
1268 | } | ||
1269 | } else { | ||
1270 | if (status >= req->rq_slen) | ||
1271 | goto out_receive; | ||
1272 | status = -EAGAIN; | ||
1273 | break; | ||
1274 | } | ||
1275 | |||
1276 | dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", | ||
1277 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
1278 | req->rq_slen); | ||
1279 | |||
1280 | status = -EAGAIN; | ||
1281 | if (retry++ > 50) | ||
1282 | break; | ||
1283 | } | 764 | } |
1284 | 765 | ||
1285 | /* Note: at this point, task->tk_sleeping has not yet been set, | 766 | /* Note: at this point, task->tk_sleeping has not yet been set, |
@@ -1289,60 +770,19 @@ xprt_transmit(struct rpc_task *task) | |||
1289 | task->tk_status = status; | 770 | task->tk_status = status; |
1290 | 771 | ||
1291 | switch (status) { | 772 | switch (status) { |
1292 | case -EAGAIN: | ||
1293 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
1294 | /* Protect against races with xprt_write_space */ | ||
1295 | spin_lock_bh(&xprt->sock_lock); | ||
1296 | /* Don't race with disconnect */ | ||
1297 | if (!xprt_connected(xprt)) | ||
1298 | task->tk_status = -ENOTCONN; | ||
1299 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { | ||
1300 | task->tk_timeout = req->rq_timeout; | ||
1301 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
1302 | } | ||
1303 | spin_unlock_bh(&xprt->sock_lock); | ||
1304 | return; | ||
1305 | } | ||
1306 | /* Keep holding the socket if it is blocked */ | ||
1307 | rpc_delay(task, HZ>>4); | ||
1308 | return; | ||
1309 | case -ECONNREFUSED: | 773 | case -ECONNREFUSED: |
1310 | task->tk_timeout = RPC_REESTABLISH_TIMEOUT; | ||
1311 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 774 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
775 | case -EAGAIN: | ||
1312 | case -ENOTCONN: | 776 | case -ENOTCONN: |
1313 | return; | 777 | return; |
1314 | default: | 778 | default: |
1315 | if (xprt->stream) | 779 | break; |
1316 | xprt_disconnect(xprt); | ||
1317 | } | 780 | } |
1318 | xprt_release_write(xprt, task); | 781 | xprt_release_write(xprt, task); |
1319 | return; | 782 | return; |
1320 | out_receive: | ||
1321 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); | ||
1322 | /* Set the task's receive timeout value */ | ||
1323 | spin_lock_bh(&xprt->sock_lock); | ||
1324 | if (!xprt->nocong) { | ||
1325 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
1326 | task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); | ||
1327 | task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; | ||
1328 | if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) | ||
1329 | task->tk_timeout = xprt->timeout.to_maxval; | ||
1330 | } else | ||
1331 | task->tk_timeout = req->rq_timeout; | ||
1332 | /* Don't race with disconnect */ | ||
1333 | if (!xprt_connected(xprt)) | ||
1334 | task->tk_status = -ENOTCONN; | ||
1335 | else if (!req->rq_received) | ||
1336 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); | ||
1337 | __xprt_release_write(xprt, task); | ||
1338 | spin_unlock_bh(&xprt->sock_lock); | ||
1339 | } | 783 | } |
1340 | 784 | ||
1341 | /* | 785 | static inline void do_xprt_reserve(struct rpc_task *task) |
1342 | * Reserve an RPC call slot. | ||
1343 | */ | ||
1344 | static inline void | ||
1345 | do_xprt_reserve(struct rpc_task *task) | ||
1346 | { | 786 | { |
1347 | struct rpc_xprt *xprt = task->tk_xprt; | 787 | struct rpc_xprt *xprt = task->tk_xprt; |
1348 | 788 | ||
@@ -1362,22 +802,25 @@ do_xprt_reserve(struct rpc_task *task) | |||
1362 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); | 802 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); |
1363 | } | 803 | } |
1364 | 804 | ||
1365 | void | 805 | /** |
1366 | xprt_reserve(struct rpc_task *task) | 806 | * xprt_reserve - allocate an RPC request slot |
807 | * @task: RPC task requesting a slot allocation | ||
808 | * | ||
809 | * If no more slots are available, place the task on the transport's | ||
810 | * backlog queue. | ||
811 | */ | ||
812 | void xprt_reserve(struct rpc_task *task) | ||
1367 | { | 813 | { |
1368 | struct rpc_xprt *xprt = task->tk_xprt; | 814 | struct rpc_xprt *xprt = task->tk_xprt; |
1369 | 815 | ||
1370 | task->tk_status = -EIO; | 816 | task->tk_status = -EIO; |
1371 | if (!xprt->shutdown) { | 817 | if (!xprt->shutdown) { |
1372 | spin_lock(&xprt->xprt_lock); | 818 | spin_lock(&xprt->reserve_lock); |
1373 | do_xprt_reserve(task); | 819 | do_xprt_reserve(task); |
1374 | spin_unlock(&xprt->xprt_lock); | 820 | spin_unlock(&xprt->reserve_lock); |
1375 | } | 821 | } |
1376 | } | 822 | } |
1377 | 823 | ||
1378 | /* | ||
1379 | * Allocate a 'unique' XID | ||
1380 | */ | ||
1381 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) | 824 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) |
1382 | { | 825 | { |
1383 | return xprt->xid++; | 826 | return xprt->xid++; |
@@ -1388,11 +831,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) | |||
1388 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); | 831 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); |
1389 | } | 832 | } |
1390 | 833 | ||
1391 | /* | 834 | static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) |
1392 | * Initialize RPC request | ||
1393 | */ | ||
1394 | static void | ||
1395 | xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | ||
1396 | { | 835 | { |
1397 | struct rpc_rqst *req = task->tk_rqstp; | 836 | struct rpc_rqst *req = task->tk_rqstp; |
1398 | 837 | ||
@@ -1400,128 +839,104 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | |||
1400 | req->rq_task = task; | 839 | req->rq_task = task; |
1401 | req->rq_xprt = xprt; | 840 | req->rq_xprt = xprt; |
1402 | req->rq_xid = xprt_alloc_xid(xprt); | 841 | req->rq_xid = xprt_alloc_xid(xprt); |
842 | req->rq_release_snd_buf = NULL; | ||
1403 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | 843 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
1404 | req, ntohl(req->rq_xid)); | 844 | req, ntohl(req->rq_xid)); |
1405 | } | 845 | } |
1406 | 846 | ||
1407 | /* | 847 | /** |
1408 | * Release an RPC call slot | 848 | * xprt_release - release an RPC request slot |
849 | * @task: task which is finished with the slot | ||
850 | * | ||
1409 | */ | 851 | */ |
1410 | void | 852 | void xprt_release(struct rpc_task *task) |
1411 | xprt_release(struct rpc_task *task) | ||
1412 | { | 853 | { |
1413 | struct rpc_xprt *xprt = task->tk_xprt; | 854 | struct rpc_xprt *xprt = task->tk_xprt; |
1414 | struct rpc_rqst *req; | 855 | struct rpc_rqst *req; |
1415 | 856 | ||
1416 | if (!(req = task->tk_rqstp)) | 857 | if (!(req = task->tk_rqstp)) |
1417 | return; | 858 | return; |
1418 | spin_lock_bh(&xprt->sock_lock); | 859 | spin_lock_bh(&xprt->transport_lock); |
1419 | __xprt_release_write(xprt, task); | 860 | xprt->ops->release_xprt(xprt, task); |
1420 | __xprt_put_cong(xprt, req); | 861 | if (xprt->ops->release_request) |
862 | xprt->ops->release_request(task); | ||
1421 | if (!list_empty(&req->rq_list)) | 863 | if (!list_empty(&req->rq_list)) |
1422 | list_del(&req->rq_list); | 864 | list_del(&req->rq_list); |
1423 | xprt->last_used = jiffies; | 865 | xprt->last_used = jiffies; |
1424 | if (list_empty(&xprt->recv) && !xprt->shutdown) | 866 | if (list_empty(&xprt->recv) && !xprt->shutdown) |
1425 | mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); | 867 | mod_timer(&xprt->timer, |
1426 | spin_unlock_bh(&xprt->sock_lock); | 868 | xprt->last_used + xprt->idle_timeout); |
869 | spin_unlock_bh(&xprt->transport_lock); | ||
1427 | task->tk_rqstp = NULL; | 870 | task->tk_rqstp = NULL; |
871 | if (req->rq_release_snd_buf) | ||
872 | req->rq_release_snd_buf(req); | ||
1428 | memset(req, 0, sizeof(*req)); /* mark unused */ | 873 | memset(req, 0, sizeof(*req)); /* mark unused */ |
1429 | 874 | ||
1430 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); | 875 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); |
1431 | 876 | ||
1432 | spin_lock(&xprt->xprt_lock); | 877 | spin_lock(&xprt->reserve_lock); |
1433 | list_add(&req->rq_list, &xprt->free); | 878 | list_add(&req->rq_list, &xprt->free); |
1434 | xprt_clear_backlog(xprt); | 879 | rpc_wake_up_next(&xprt->backlog); |
1435 | spin_unlock(&xprt->xprt_lock); | 880 | spin_unlock(&xprt->reserve_lock); |
1436 | } | ||
1437 | |||
1438 | /* | ||
1439 | * Set default timeout parameters | ||
1440 | */ | ||
1441 | static void | ||
1442 | xprt_default_timeout(struct rpc_timeout *to, int proto) | ||
1443 | { | ||
1444 | if (proto == IPPROTO_UDP) | ||
1445 | xprt_set_timeout(to, 5, 5 * HZ); | ||
1446 | else | ||
1447 | xprt_set_timeout(to, 5, 60 * HZ); | ||
1448 | } | 881 | } |
1449 | 882 | ||
1450 | /* | 883 | /** |
1451 | * Set constant timeout | 884 | * xprt_set_timeout - set constant RPC timeout |
885 | * @to: RPC timeout parameters to set up | ||
886 | * @retr: number of retries | ||
887 | * @incr: amount of increase after each retry | ||
888 | * | ||
1452 | */ | 889 | */ |
1453 | void | 890 | void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) |
1454 | xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) | ||
1455 | { | 891 | { |
1456 | to->to_initval = | 892 | to->to_initval = |
1457 | to->to_increment = incr; | 893 | to->to_increment = incr; |
1458 | to->to_maxval = incr * retr; | 894 | to->to_maxval = to->to_initval + (incr * retr); |
1459 | to->to_retries = retr; | 895 | to->to_retries = retr; |
1460 | to->to_exponential = 0; | 896 | to->to_exponential = 0; |
1461 | } | 897 | } |
1462 | 898 | ||
1463 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | 899 | static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) |
1464 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
1465 | |||
1466 | /* | ||
1467 | * Initialize an RPC client | ||
1468 | */ | ||
1469 | static struct rpc_xprt * | ||
1470 | xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | ||
1471 | { | 900 | { |
901 | int result; | ||
1472 | struct rpc_xprt *xprt; | 902 | struct rpc_xprt *xprt; |
1473 | unsigned int entries; | ||
1474 | size_t slot_table_size; | ||
1475 | struct rpc_rqst *req; | 903 | struct rpc_rqst *req; |
1476 | 904 | ||
1477 | dprintk("RPC: setting up %s transport...\n", | ||
1478 | proto == IPPROTO_UDP? "UDP" : "TCP"); | ||
1479 | |||
1480 | entries = (proto == IPPROTO_TCP)? | ||
1481 | xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; | ||
1482 | |||
1483 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) | 905 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) |
1484 | return ERR_PTR(-ENOMEM); | 906 | return ERR_PTR(-ENOMEM); |
1485 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ | 907 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ |
1486 | xprt->max_reqs = entries; | ||
1487 | slot_table_size = entries * sizeof(xprt->slot[0]); | ||
1488 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1489 | if (xprt->slot == NULL) { | ||
1490 | kfree(xprt); | ||
1491 | return ERR_PTR(-ENOMEM); | ||
1492 | } | ||
1493 | memset(xprt->slot, 0, slot_table_size); | ||
1494 | 908 | ||
1495 | xprt->addr = *ap; | 909 | xprt->addr = *ap; |
1496 | xprt->prot = proto; | 910 | |
1497 | xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; | 911 | switch (proto) { |
1498 | if (xprt->stream) { | 912 | case IPPROTO_UDP: |
1499 | xprt->cwnd = RPC_MAXCWND(xprt); | 913 | result = xs_setup_udp(xprt, to); |
1500 | xprt->nocong = 1; | 914 | break; |
1501 | xprt->max_payload = (1U << 31) - 1; | 915 | case IPPROTO_TCP: |
1502 | } else { | 916 | result = xs_setup_tcp(xprt, to); |
1503 | xprt->cwnd = RPC_INITCWND; | 917 | break; |
1504 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 918 | default: |
919 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", | ||
920 | proto); | ||
921 | result = -EIO; | ||
922 | break; | ||
923 | } | ||
924 | if (result) { | ||
925 | kfree(xprt); | ||
926 | return ERR_PTR(result); | ||
1505 | } | 927 | } |
1506 | spin_lock_init(&xprt->sock_lock); | 928 | |
1507 | spin_lock_init(&xprt->xprt_lock); | 929 | spin_lock_init(&xprt->transport_lock); |
1508 | init_waitqueue_head(&xprt->cong_wait); | 930 | spin_lock_init(&xprt->reserve_lock); |
1509 | 931 | ||
1510 | INIT_LIST_HEAD(&xprt->free); | 932 | INIT_LIST_HEAD(&xprt->free); |
1511 | INIT_LIST_HEAD(&xprt->recv); | 933 | INIT_LIST_HEAD(&xprt->recv); |
1512 | INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); | 934 | INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); |
1513 | INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); | ||
1514 | init_timer(&xprt->timer); | 935 | init_timer(&xprt->timer); |
1515 | xprt->timer.function = xprt_init_autodisconnect; | 936 | xprt->timer.function = xprt_init_autodisconnect; |
1516 | xprt->timer.data = (unsigned long) xprt; | 937 | xprt->timer.data = (unsigned long) xprt; |
1517 | xprt->last_used = jiffies; | 938 | xprt->last_used = jiffies; |
1518 | xprt->port = XPRT_MAX_RESVPORT; | 939 | xprt->cwnd = RPC_INITCWND; |
1519 | |||
1520 | /* Set timeout parameters */ | ||
1521 | if (to) { | ||
1522 | xprt->timeout = *to; | ||
1523 | } else | ||
1524 | xprt_default_timeout(&xprt->timeout, xprt->prot); | ||
1525 | 940 | ||
1526 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); | 941 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); |
1527 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); | 942 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); |
@@ -1529,139 +944,25 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | |||
1529 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); | 944 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); |
1530 | 945 | ||
1531 | /* initialize free list */ | 946 | /* initialize free list */ |
1532 | for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) | 947 | for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) |
1533 | list_add(&req->rq_list, &xprt->free); | 948 | list_add(&req->rq_list, &xprt->free); |
1534 | 949 | ||
1535 | xprt_init_xid(xprt); | 950 | xprt_init_xid(xprt); |
1536 | 951 | ||
1537 | /* Check whether we want to use a reserved port */ | ||
1538 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1539 | |||
1540 | dprintk("RPC: created transport %p with %u slots\n", xprt, | 952 | dprintk("RPC: created transport %p with %u slots\n", xprt, |
1541 | xprt->max_reqs); | 953 | xprt->max_reqs); |
1542 | 954 | ||
1543 | return xprt; | 955 | return xprt; |
1544 | } | 956 | } |
1545 | 957 | ||
1546 | /* | 958 | /** |
1547 | * Bind to a reserved port | 959 | * xprt_create_proto - create an RPC client transport |
1548 | */ | 960 | * @proto: requested transport protocol |
1549 | static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | 961 | * @sap: remote peer's address |
1550 | { | 962 | * @to: timeout parameters for new transport |
1551 | struct sockaddr_in myaddr = { | 963 | * |
1552 | .sin_family = AF_INET, | ||
1553 | }; | ||
1554 | int err, port; | ||
1555 | |||
1556 | /* Were we already bound to a given port? Try to reuse it */ | ||
1557 | port = xprt->port; | ||
1558 | do { | ||
1559 | myaddr.sin_port = htons(port); | ||
1560 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
1561 | sizeof(myaddr)); | ||
1562 | if (err == 0) { | ||
1563 | xprt->port = port; | ||
1564 | return 0; | ||
1565 | } | ||
1566 | if (--port == 0) | ||
1567 | port = XPRT_MAX_RESVPORT; | ||
1568 | } while (err == -EADDRINUSE && port != xprt->port); | ||
1569 | |||
1570 | printk("RPC: Can't bind to reserved port (%d).\n", -err); | ||
1571 | return err; | ||
1572 | } | ||
1573 | |||
1574 | static void | ||
1575 | xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) | ||
1576 | { | ||
1577 | struct sock *sk = sock->sk; | ||
1578 | |||
1579 | if (xprt->inet) | ||
1580 | return; | ||
1581 | |||
1582 | write_lock_bh(&sk->sk_callback_lock); | ||
1583 | sk->sk_user_data = xprt; | ||
1584 | xprt->old_data_ready = sk->sk_data_ready; | ||
1585 | xprt->old_state_change = sk->sk_state_change; | ||
1586 | xprt->old_write_space = sk->sk_write_space; | ||
1587 | if (xprt->prot == IPPROTO_UDP) { | ||
1588 | sk->sk_data_ready = udp_data_ready; | ||
1589 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
1590 | xprt_set_connected(xprt); | ||
1591 | } else { | ||
1592 | tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ | ||
1593 | sk->sk_data_ready = tcp_data_ready; | ||
1594 | sk->sk_state_change = tcp_state_change; | ||
1595 | xprt_clear_connected(xprt); | ||
1596 | } | ||
1597 | sk->sk_write_space = xprt_write_space; | ||
1598 | |||
1599 | /* Reset to new socket */ | ||
1600 | xprt->sock = sock; | ||
1601 | xprt->inet = sk; | ||
1602 | write_unlock_bh(&sk->sk_callback_lock); | ||
1603 | |||
1604 | return; | ||
1605 | } | ||
1606 | |||
1607 | /* | ||
1608 | * Set socket buffer length | ||
1609 | */ | ||
1610 | void | ||
1611 | xprt_sock_setbufsize(struct rpc_xprt *xprt) | ||
1612 | { | ||
1613 | struct sock *sk = xprt->inet; | ||
1614 | |||
1615 | if (xprt->stream) | ||
1616 | return; | ||
1617 | if (xprt->rcvsize) { | ||
1618 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
1619 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
1620 | } | ||
1621 | if (xprt->sndsize) { | ||
1622 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
1623 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
1624 | sk->sk_write_space(sk); | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1628 | /* | ||
1629 | * Datastream sockets are created here, but xprt_connect will create | ||
1630 | * and connect stream sockets. | ||
1631 | */ | ||
1632 | static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) | ||
1633 | { | ||
1634 | struct socket *sock; | ||
1635 | int type, err; | ||
1636 | |||
1637 | dprintk("RPC: xprt_create_socket(%s %d)\n", | ||
1638 | (proto == IPPROTO_UDP)? "udp" : "tcp", proto); | ||
1639 | |||
1640 | type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | ||
1641 | |||
1642 | if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { | ||
1643 | printk("RPC: can't create socket (%d).\n", -err); | ||
1644 | return NULL; | ||
1645 | } | ||
1646 | |||
1647 | /* If the caller has the capability, bind to a reserved port */ | ||
1648 | if (resvport && xprt_bindresvport(xprt, sock) < 0) { | ||
1649 | printk("RPC: can't bind to reserved port.\n"); | ||
1650 | goto failed; | ||
1651 | } | ||
1652 | |||
1653 | return sock; | ||
1654 | |||
1655 | failed: | ||
1656 | sock_release(sock); | ||
1657 | return NULL; | ||
1658 | } | ||
1659 | |||
1660 | /* | ||
1661 | * Create an RPC client transport given the protocol and peer address. | ||
1662 | */ | 964 | */ |
1663 | struct rpc_xprt * | 965 | struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) |
1664 | xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | ||
1665 | { | 966 | { |
1666 | struct rpc_xprt *xprt; | 967 | struct rpc_xprt *xprt; |
1667 | 968 | ||
@@ -1673,46 +974,26 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | |||
1673 | return xprt; | 974 | return xprt; |
1674 | } | 975 | } |
1675 | 976 | ||
1676 | /* | 977 | static void xprt_shutdown(struct rpc_xprt *xprt) |
1677 | * Prepare for transport shutdown. | ||
1678 | */ | ||
1679 | static void | ||
1680 | xprt_shutdown(struct rpc_xprt *xprt) | ||
1681 | { | 978 | { |
1682 | xprt->shutdown = 1; | 979 | xprt->shutdown = 1; |
1683 | rpc_wake_up(&xprt->sending); | 980 | rpc_wake_up(&xprt->sending); |
1684 | rpc_wake_up(&xprt->resend); | 981 | rpc_wake_up(&xprt->resend); |
1685 | rpc_wake_up(&xprt->pending); | 982 | xprt_wake_pending_tasks(xprt, -EIO); |
1686 | rpc_wake_up(&xprt->backlog); | 983 | rpc_wake_up(&xprt->backlog); |
1687 | wake_up(&xprt->cong_wait); | ||
1688 | del_timer_sync(&xprt->timer); | 984 | del_timer_sync(&xprt->timer); |
1689 | |||
1690 | /* synchronously wait for connect worker to finish */ | ||
1691 | cancel_delayed_work(&xprt->sock_connect); | ||
1692 | flush_scheduled_work(); | ||
1693 | } | 985 | } |
1694 | 986 | ||
1695 | /* | 987 | /** |
1696 | * Clear the xprt backlog queue | 988 | * xprt_destroy - destroy an RPC transport, killing off all requests. |
1697 | */ | 989 | * @xprt: transport to destroy |
1698 | static int | 990 | * |
1699 | xprt_clear_backlog(struct rpc_xprt *xprt) { | ||
1700 | rpc_wake_up_next(&xprt->backlog); | ||
1701 | wake_up(&xprt->cong_wait); | ||
1702 | return 1; | ||
1703 | } | ||
1704 | |||
1705 | /* | ||
1706 | * Destroy an RPC transport, killing off all requests. | ||
1707 | */ | 991 | */ |
1708 | int | 992 | int xprt_destroy(struct rpc_xprt *xprt) |
1709 | xprt_destroy(struct rpc_xprt *xprt) | ||
1710 | { | 993 | { |
1711 | dprintk("RPC: destroying transport %p\n", xprt); | 994 | dprintk("RPC: destroying transport %p\n", xprt); |
1712 | xprt_shutdown(xprt); | 995 | xprt_shutdown(xprt); |
1713 | xprt_disconnect(xprt); | 996 | xprt->ops->destroy(xprt); |
1714 | xprt_close(xprt); | ||
1715 | kfree(xprt->slot); | ||
1716 | kfree(xprt); | 997 | kfree(xprt); |
1717 | 998 | ||
1718 | return 0; | 999 | return 0; |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 000000000000..2e1529217e65 --- /dev/null +++ b/net/sunrpc/xprtsock.c | |||
@@ -0,0 +1,1252 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/xprtsock.c | ||
3 | * | ||
4 | * Client-side transport implementation for sockets. | ||
5 | * | ||
6 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
7 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
8 | * TCP NFS related read + write fixes | ||
9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
10 | * | ||
11 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
12 | * Fix behaviour when socket buffer is full. | ||
13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
14 | * | ||
15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/capability.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/pagemap.h> | ||
23 | #include <linux/errno.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/net.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/udp.h> | ||
29 | #include <linux/tcp.h> | ||
30 | #include <linux/sunrpc/clnt.h> | ||
31 | #include <linux/file.h> | ||
32 | |||
33 | #include <net/sock.h> | ||
34 | #include <net/checksum.h> | ||
35 | #include <net/udp.h> | ||
36 | #include <net/tcp.h> | ||
37 | |||
38 | /* | ||
39 | * How many times to try sending a request on a socket before waiting | ||
40 | * for the socket buffer to clear. | ||
41 | */ | ||
42 | #define XS_SENDMSG_RETRY (10U) | ||
43 | |||
44 | /* | ||
45 | * Time out for an RPC UDP socket connect. UDP socket connects are | ||
46 | * synchronous, but we set a timeout anyway in case of resource | ||
47 | * exhaustion on the local host. | ||
48 | */ | ||
49 | #define XS_UDP_CONN_TO (5U * HZ) | ||
50 | |||
51 | /* | ||
52 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
53 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
54 | * long a server takes to reboot. | ||
55 | */ | ||
56 | #define XS_TCP_CONN_TO (60U * HZ) | ||
57 | |||
58 | /* | ||
59 | * Wait duration for a reply from the RPC portmapper. | ||
60 | */ | ||
61 | #define XS_BIND_TO (60U * HZ) | ||
62 | |||
63 | /* | ||
64 | * Delay if a UDP socket connect error occurs. This is most likely some | ||
65 | * kind of resource problem on the local host. | ||
66 | */ | ||
67 | #define XS_UDP_REEST_TO (2U * HZ) | ||
68 | |||
69 | /* | ||
70 | * The reestablish timeout allows clients to delay for a bit before attempting | ||
71 | * to reconnect to a server that just dropped our connection. | ||
72 | * | ||
73 | * We implement an exponential backoff when trying to reestablish a TCP | ||
74 | * transport connection with the server. Some servers like to drop a TCP | ||
75 | * connection when they are overworked, so we start with a short timeout and | ||
76 | * increase over time if the server is down or not responding. | ||
77 | */ | ||
78 | #define XS_TCP_INIT_REEST_TO (3U * HZ) | ||
79 | #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) | ||
80 | |||
81 | /* | ||
82 | * TCP idle timeout; client drops the transport socket if it is idle | ||
83 | * for this long. Note that we also timeout UDP sockets to prevent | ||
84 | * holding port numbers when there is no RPC traffic. | ||
85 | */ | ||
86 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) | ||
87 | |||
88 | #ifdef RPC_DEBUG | ||
89 | # undef RPC_DEBUG_DATA | ||
90 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
91 | #endif | ||
92 | |||
93 | #ifdef RPC_DEBUG_DATA | ||
94 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
95 | { | ||
96 | u8 *buf = (u8 *) packet; | ||
97 | int j; | ||
98 | |||
99 | dprintk("RPC: %s\n", msg); | ||
100 | for (j = 0; j < count && j < 128; j += 4) { | ||
101 | if (!(j & 31)) { | ||
102 | if (j) | ||
103 | dprintk("\n"); | ||
104 | dprintk("0x%04x ", j); | ||
105 | } | ||
106 | dprintk("%02x%02x%02x%02x ", | ||
107 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
108 | } | ||
109 | dprintk("\n"); | ||
110 | } | ||
111 | #else | ||
112 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
113 | { | ||
114 | /* NOP */ | ||
115 | } | ||
116 | #endif | ||
117 | |||
118 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | ||
119 | |||
120 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
121 | { | ||
122 | struct kvec iov = { | ||
123 | .iov_base = xdr->head[0].iov_base + base, | ||
124 | .iov_len = len - base, | ||
125 | }; | ||
126 | struct msghdr msg = { | ||
127 | .msg_name = addr, | ||
128 | .msg_namelen = addrlen, | ||
129 | .msg_flags = XS_SENDMSG_FLAGS, | ||
130 | }; | ||
131 | |||
132 | if (xdr->len > len) | ||
133 | msg.msg_flags |= MSG_MORE; | ||
134 | |||
135 | if (likely(iov.iov_len)) | ||
136 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
137 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
138 | } | ||
139 | |||
140 | static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
141 | { | ||
142 | struct kvec iov = { | ||
143 | .iov_base = xdr->tail[0].iov_base + base, | ||
144 | .iov_len = len - base, | ||
145 | }; | ||
146 | struct msghdr msg = { | ||
147 | .msg_flags = XS_SENDMSG_FLAGS, | ||
148 | }; | ||
149 | |||
150 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * xs_sendpages - write pages directly to a socket | ||
155 | * @sock: socket to send on | ||
156 | * @addr: UDP only -- address of destination | ||
157 | * @addrlen: UDP only -- length of destination address | ||
158 | * @xdr: buffer containing this request | ||
159 | * @base: starting position in the buffer | ||
160 | * | ||
161 | */ | ||
162 | static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) | ||
163 | { | ||
164 | struct page **ppage = xdr->pages; | ||
165 | unsigned int len, pglen = xdr->page_len; | ||
166 | int err, ret = 0; | ||
167 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
168 | |||
169 | if (unlikely(!sock)) | ||
170 | return -ENOTCONN; | ||
171 | |||
172 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
173 | |||
174 | len = xdr->head[0].iov_len; | ||
175 | if (base < len || (addr != NULL && base == 0)) { | ||
176 | err = xs_send_head(sock, addr, addrlen, xdr, base, len); | ||
177 | if (ret == 0) | ||
178 | ret = err; | ||
179 | else if (err > 0) | ||
180 | ret += err; | ||
181 | if (err != (len - base)) | ||
182 | goto out; | ||
183 | base = 0; | ||
184 | } else | ||
185 | base -= len; | ||
186 | |||
187 | if (unlikely(pglen == 0)) | ||
188 | goto copy_tail; | ||
189 | if (unlikely(base >= pglen)) { | ||
190 | base -= pglen; | ||
191 | goto copy_tail; | ||
192 | } | ||
193 | if (base || xdr->page_base) { | ||
194 | pglen -= base; | ||
195 | base += xdr->page_base; | ||
196 | ppage += base >> PAGE_CACHE_SHIFT; | ||
197 | base &= ~PAGE_CACHE_MASK; | ||
198 | } | ||
199 | |||
200 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
201 | do { | ||
202 | int flags = XS_SENDMSG_FLAGS; | ||
203 | |||
204 | len = PAGE_CACHE_SIZE; | ||
205 | if (base) | ||
206 | len -= base; | ||
207 | if (pglen < len) | ||
208 | len = pglen; | ||
209 | |||
210 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
211 | flags |= MSG_MORE; | ||
212 | |||
213 | /* Hmm... We might be dealing with highmem pages */ | ||
214 | if (PageHighMem(*ppage)) | ||
215 | sendpage = sock_no_sendpage; | ||
216 | err = sendpage(sock, *ppage, base, len, flags); | ||
217 | if (ret == 0) | ||
218 | ret = err; | ||
219 | else if (err > 0) | ||
220 | ret += err; | ||
221 | if (err != len) | ||
222 | goto out; | ||
223 | base = 0; | ||
224 | ppage++; | ||
225 | } while ((pglen -= len) != 0); | ||
226 | copy_tail: | ||
227 | len = xdr->tail[0].iov_len; | ||
228 | if (base < len) { | ||
229 | err = xs_send_tail(sock, xdr, base, len); | ||
230 | if (ret == 0) | ||
231 | ret = err; | ||
232 | else if (err > 0) | ||
233 | ret += err; | ||
234 | } | ||
235 | out: | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * xs_nospace - place task on wait queue if transmit was incomplete | ||
241 | * @task: task to put to sleep | ||
242 | * | ||
243 | */ | ||
244 | static void xs_nospace(struct rpc_task *task) | ||
245 | { | ||
246 | struct rpc_rqst *req = task->tk_rqstp; | ||
247 | struct rpc_xprt *xprt = req->rq_xprt; | ||
248 | |||
249 | dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", | ||
250 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
251 | req->rq_slen); | ||
252 | |||
253 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
254 | /* Protect against races with write_space */ | ||
255 | spin_lock_bh(&xprt->transport_lock); | ||
256 | |||
257 | /* Don't race with disconnect */ | ||
258 | if (!xprt_connected(xprt)) | ||
259 | task->tk_status = -ENOTCONN; | ||
260 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) | ||
261 | xprt_wait_for_buffer_space(task); | ||
262 | |||
263 | spin_unlock_bh(&xprt->transport_lock); | ||
264 | } else | ||
265 | /* Keep holding the socket if it is blocked */ | ||
266 | rpc_delay(task, HZ>>4); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * xs_udp_send_request - write an RPC request to a UDP socket | ||
271 | * @task: address of RPC task that manages the state of an RPC request | ||
272 | * | ||
273 | * Return values: | ||
274 | * 0: The request has been sent | ||
275 | * EAGAIN: The socket was blocked, please call again later to | ||
276 | * complete the request | ||
277 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
278 | * other: Some other error occured, the request was not sent | ||
279 | */ | ||
280 | static int xs_udp_send_request(struct rpc_task *task) | ||
281 | { | ||
282 | struct rpc_rqst *req = task->tk_rqstp; | ||
283 | struct rpc_xprt *xprt = req->rq_xprt; | ||
284 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
285 | int status; | ||
286 | |||
287 | xs_pktdump("packet data:", | ||
288 | req->rq_svec->iov_base, | ||
289 | req->rq_svec->iov_len); | ||
290 | |||
291 | req->rq_xtime = jiffies; | ||
292 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, | ||
293 | sizeof(xprt->addr), xdr, req->rq_bytes_sent); | ||
294 | |||
295 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | ||
296 | xdr->len - req->rq_bytes_sent, status); | ||
297 | |||
298 | if (likely(status >= (int) req->rq_slen)) | ||
299 | return 0; | ||
300 | |||
301 | /* Still some bytes left; set up for a retry later. */ | ||
302 | if (status > 0) | ||
303 | status = -EAGAIN; | ||
304 | |||
305 | switch (status) { | ||
306 | case -ENETUNREACH: | ||
307 | case -EPIPE: | ||
308 | case -ECONNREFUSED: | ||
309 | /* When the server has died, an ICMP port unreachable message | ||
310 | * prompts ECONNREFUSED. */ | ||
311 | break; | ||
312 | case -EAGAIN: | ||
313 | xs_nospace(task); | ||
314 | break; | ||
315 | default: | ||
316 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
317 | -status); | ||
318 | break; | ||
319 | } | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) | ||
325 | { | ||
326 | u32 reclen = buf->len - sizeof(rpc_fraghdr); | ||
327 | rpc_fraghdr *base = buf->head[0].iov_base; | ||
328 | *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * xs_tcp_send_request - write an RPC request to a TCP socket | ||
333 | * @task: address of RPC task that manages the state of an RPC request | ||
334 | * | ||
335 | * Return values: | ||
336 | * 0: The request has been sent | ||
337 | * EAGAIN: The socket was blocked, please call again later to | ||
338 | * complete the request | ||
339 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
340 | * other: Some other error occured, the request was not sent | ||
341 | * | ||
342 | * XXX: In the case of soft timeouts, should we eventually give up | ||
343 | * if sendmsg is not able to make progress? | ||
344 | */ | ||
345 | static int xs_tcp_send_request(struct rpc_task *task) | ||
346 | { | ||
347 | struct rpc_rqst *req = task->tk_rqstp; | ||
348 | struct rpc_xprt *xprt = req->rq_xprt; | ||
349 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
350 | int status, retry = 0; | ||
351 | |||
352 | xs_encode_tcp_record_marker(&req->rq_snd_buf); | ||
353 | |||
354 | xs_pktdump("packet data:", | ||
355 | req->rq_svec->iov_base, | ||
356 | req->rq_svec->iov_len); | ||
357 | |||
358 | /* Continue transmitting the packet/record. We must be careful | ||
359 | * to cope with writespace callbacks arriving _after_ we have | ||
360 | * called sendmsg(). */ | ||
361 | while (1) { | ||
362 | req->rq_xtime = jiffies; | ||
363 | status = xs_sendpages(xprt->sock, NULL, 0, xdr, | ||
364 | req->rq_bytes_sent); | ||
365 | |||
366 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", | ||
367 | xdr->len - req->rq_bytes_sent, status); | ||
368 | |||
369 | if (unlikely(status < 0)) | ||
370 | break; | ||
371 | |||
372 | /* If we've sent the entire packet, immediately | ||
373 | * reset the count of bytes sent. */ | ||
374 | req->rq_bytes_sent += status; | ||
375 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | ||
376 | req->rq_bytes_sent = 0; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | status = -EAGAIN; | ||
381 | if (retry++ > XS_SENDMSG_RETRY) | ||
382 | break; | ||
383 | } | ||
384 | |||
385 | switch (status) { | ||
386 | case -EAGAIN: | ||
387 | xs_nospace(task); | ||
388 | break; | ||
389 | case -ECONNREFUSED: | ||
390 | case -ECONNRESET: | ||
391 | case -ENOTCONN: | ||
392 | case -EPIPE: | ||
393 | status = -ENOTCONN; | ||
394 | break; | ||
395 | default: | ||
396 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
397 | -status); | ||
398 | xprt_disconnect(xprt); | ||
399 | break; | ||
400 | } | ||
401 | |||
402 | return status; | ||
403 | } | ||
404 | |||
405 | /** | ||
406 | * xs_close - close a socket | ||
407 | * @xprt: transport | ||
408 | * | ||
409 | * This is used when all requests are complete; ie, no DRC state remains | ||
410 | * on the server we want to save. | ||
411 | */ | ||
412 | static void xs_close(struct rpc_xprt *xprt) | ||
413 | { | ||
414 | struct socket *sock = xprt->sock; | ||
415 | struct sock *sk = xprt->inet; | ||
416 | |||
417 | if (!sk) | ||
418 | return; | ||
419 | |||
420 | dprintk("RPC: xs_close xprt %p\n", xprt); | ||
421 | |||
422 | write_lock_bh(&sk->sk_callback_lock); | ||
423 | xprt->inet = NULL; | ||
424 | xprt->sock = NULL; | ||
425 | |||
426 | sk->sk_user_data = NULL; | ||
427 | sk->sk_data_ready = xprt->old_data_ready; | ||
428 | sk->sk_state_change = xprt->old_state_change; | ||
429 | sk->sk_write_space = xprt->old_write_space; | ||
430 | write_unlock_bh(&sk->sk_callback_lock); | ||
431 | |||
432 | sk->sk_no_check = 0; | ||
433 | |||
434 | sock_release(sock); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * xs_destroy - prepare to shutdown a transport | ||
439 | * @xprt: doomed transport | ||
440 | * | ||
441 | */ | ||
442 | static void xs_destroy(struct rpc_xprt *xprt) | ||
443 | { | ||
444 | dprintk("RPC: xs_destroy xprt %p\n", xprt); | ||
445 | |||
446 | cancel_delayed_work(&xprt->connect_worker); | ||
447 | flush_scheduled_work(); | ||
448 | |||
449 | xprt_disconnect(xprt); | ||
450 | xs_close(xprt); | ||
451 | kfree(xprt->slot); | ||
452 | } | ||
453 | |||
454 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) | ||
455 | { | ||
456 | return (struct rpc_xprt *) sk->sk_user_data; | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * xs_udp_data_ready - "data ready" callback for UDP sockets | ||
461 | * @sk: socket with data to read | ||
462 | * @len: how much data to read | ||
463 | * | ||
464 | */ | ||
465 | static void xs_udp_data_ready(struct sock *sk, int len) | ||
466 | { | ||
467 | struct rpc_task *task; | ||
468 | struct rpc_xprt *xprt; | ||
469 | struct rpc_rqst *rovr; | ||
470 | struct sk_buff *skb; | ||
471 | int err, repsize, copied; | ||
472 | u32 _xid, *xp; | ||
473 | |||
474 | read_lock(&sk->sk_callback_lock); | ||
475 | dprintk("RPC: xs_udp_data_ready...\n"); | ||
476 | if (!(xprt = xprt_from_sock(sk))) | ||
477 | goto out; | ||
478 | |||
479 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
480 | goto out; | ||
481 | |||
482 | if (xprt->shutdown) | ||
483 | goto dropit; | ||
484 | |||
485 | repsize = skb->len - sizeof(struct udphdr); | ||
486 | if (repsize < 4) { | ||
487 | dprintk("RPC: impossible RPC reply size %d!\n", repsize); | ||
488 | goto dropit; | ||
489 | } | ||
490 | |||
491 | /* Copy the XID from the skb... */ | ||
492 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
493 | sizeof(_xid), &_xid); | ||
494 | if (xp == NULL) | ||
495 | goto dropit; | ||
496 | |||
497 | /* Look up and lock the request corresponding to the given XID */ | ||
498 | spin_lock(&xprt->transport_lock); | ||
499 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
500 | if (!rovr) | ||
501 | goto out_unlock; | ||
502 | task = rovr->rq_task; | ||
503 | |||
504 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
505 | copied = repsize; | ||
506 | |||
507 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
508 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
509 | goto out_unlock; | ||
510 | |||
511 | /* Something worked... */ | ||
512 | dst_confirm(skb->dst); | ||
513 | |||
514 | xprt_adjust_cwnd(task, copied); | ||
515 | xprt_update_rtt(task); | ||
516 | xprt_complete_rqst(task, copied); | ||
517 | |||
518 | out_unlock: | ||
519 | spin_unlock(&xprt->transport_lock); | ||
520 | dropit: | ||
521 | skb_free_datagram(sk, skb); | ||
522 | out: | ||
523 | read_unlock(&sk->sk_callback_lock); | ||
524 | } | ||
525 | |||
526 | static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
527 | { | ||
528 | if (len > desc->count) | ||
529 | len = desc->count; | ||
530 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
531 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
532 | len, desc->count); | ||
533 | return 0; | ||
534 | } | ||
535 | desc->offset += len; | ||
536 | desc->count -= len; | ||
537 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
538 | len, desc->count); | ||
539 | return len; | ||
540 | } | ||
541 | |||
542 | static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
543 | { | ||
544 | size_t len, used; | ||
545 | char *p; | ||
546 | |||
547 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
548 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
549 | used = xs_tcp_copy_data(desc, p, len); | ||
550 | xprt->tcp_offset += used; | ||
551 | if (used != len) | ||
552 | return; | ||
553 | |||
554 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
555 | if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) | ||
556 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
557 | else | ||
558 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
559 | xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; | ||
560 | |||
561 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
562 | xprt->tcp_offset = 0; | ||
563 | |||
564 | /* Sanity check of the record length */ | ||
565 | if (unlikely(xprt->tcp_reclen < 4)) { | ||
566 | dprintk("RPC: invalid TCP record fragment length\n"); | ||
567 | xprt_disconnect(xprt); | ||
568 | return; | ||
569 | } | ||
570 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
571 | xprt->tcp_reclen); | ||
572 | } | ||
573 | |||
574 | static void xs_tcp_check_recm(struct rpc_xprt *xprt) | ||
575 | { | ||
576 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
577 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
578 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
579 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
580 | xprt->tcp_offset = 0; | ||
581 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
582 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
583 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
584 | xprt->tcp_copied = 0; | ||
585 | } | ||
586 | } | ||
587 | } | ||
588 | |||
589 | static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
590 | { | ||
591 | size_t len, used; | ||
592 | char *p; | ||
593 | |||
594 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
595 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
596 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
597 | used = xs_tcp_copy_data(desc, p, len); | ||
598 | xprt->tcp_offset += used; | ||
599 | if (used != len) | ||
600 | return; | ||
601 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
602 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
603 | xprt->tcp_copied = 4; | ||
604 | dprintk("RPC: reading reply for XID %08x\n", | ||
605 | ntohl(xprt->tcp_xid)); | ||
606 | xs_tcp_check_recm(xprt); | ||
607 | } | ||
608 | |||
609 | static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
610 | { | ||
611 | struct rpc_rqst *req; | ||
612 | struct xdr_buf *rcvbuf; | ||
613 | size_t len; | ||
614 | ssize_t r; | ||
615 | |||
616 | /* Find and lock the request corresponding to this xid */ | ||
617 | spin_lock(&xprt->transport_lock); | ||
618 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
619 | if (!req) { | ||
620 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
621 | dprintk("RPC: XID %08x request not found!\n", | ||
622 | ntohl(xprt->tcp_xid)); | ||
623 | spin_unlock(&xprt->transport_lock); | ||
624 | return; | ||
625 | } | ||
626 | |||
627 | rcvbuf = &req->rq_private_buf; | ||
628 | len = desc->count; | ||
629 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
630 | skb_reader_t my_desc; | ||
631 | |||
632 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
633 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
634 | my_desc.count = len; | ||
635 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
636 | &my_desc, xs_tcp_copy_data); | ||
637 | desc->count -= r; | ||
638 | desc->offset += r; | ||
639 | } else | ||
640 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
641 | desc, xs_tcp_copy_data); | ||
642 | |||
643 | if (r > 0) { | ||
644 | xprt->tcp_copied += r; | ||
645 | xprt->tcp_offset += r; | ||
646 | } | ||
647 | if (r != len) { | ||
648 | /* Error when copying to the receive buffer, | ||
649 | * usually because we weren't able to allocate | ||
650 | * additional buffer pages. All we can do now | ||
651 | * is turn off XPRT_COPY_DATA, so the request | ||
652 | * will not receive any additional updates, | ||
653 | * and time out. | ||
654 | * Any remaining data from this record will | ||
655 | * be discarded. | ||
656 | */ | ||
657 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
658 | dprintk("RPC: XID %08x truncated request\n", | ||
659 | ntohl(xprt->tcp_xid)); | ||
660 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
661 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
662 | goto out; | ||
663 | } | ||
664 | |||
665 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
666 | ntohl(xprt->tcp_xid), r); | ||
667 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
668 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
669 | |||
670 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
671 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
672 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
673 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
674 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
675 | } | ||
676 | |||
677 | out: | ||
678 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) | ||
679 | xprt_complete_rqst(req->rq_task, xprt->tcp_copied); | ||
680 | spin_unlock(&xprt->transport_lock); | ||
681 | xs_tcp_check_recm(xprt); | ||
682 | } | ||
683 | |||
684 | static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
685 | { | ||
686 | size_t len; | ||
687 | |||
688 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
689 | if (len > desc->count) | ||
690 | len = desc->count; | ||
691 | desc->count -= len; | ||
692 | desc->offset += len; | ||
693 | xprt->tcp_offset += len; | ||
694 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
695 | xs_tcp_check_recm(xprt); | ||
696 | } | ||
697 | |||
698 | static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) | ||
699 | { | ||
700 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
701 | skb_reader_t desc = { | ||
702 | .skb = skb, | ||
703 | .offset = offset, | ||
704 | .count = len, | ||
705 | .csum = 0 | ||
706 | }; | ||
707 | |||
708 | dprintk("RPC: xs_tcp_data_recv started\n"); | ||
709 | do { | ||
710 | /* Read in a new fragment marker if necessary */ | ||
711 | /* Can we ever really expect to get completely empty fragments? */ | ||
712 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
713 | xs_tcp_read_fraghdr(xprt, &desc); | ||
714 | continue; | ||
715 | } | ||
716 | /* Read in the xid if necessary */ | ||
717 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
718 | xs_tcp_read_xid(xprt, &desc); | ||
719 | continue; | ||
720 | } | ||
721 | /* Read in the request data */ | ||
722 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
723 | xs_tcp_read_request(xprt, &desc); | ||
724 | continue; | ||
725 | } | ||
726 | /* Skip over any trailing bytes on short reads */ | ||
727 | xs_tcp_read_discard(xprt, &desc); | ||
728 | } while (desc.count); | ||
729 | dprintk("RPC: xs_tcp_data_recv done\n"); | ||
730 | return len - desc.count; | ||
731 | } | ||
732 | |||
733 | /** | ||
734 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | ||
735 | * @sk: socket with data to read | ||
736 | * @bytes: how much data to read | ||
737 | * | ||
738 | */ | ||
739 | static void xs_tcp_data_ready(struct sock *sk, int bytes) | ||
740 | { | ||
741 | struct rpc_xprt *xprt; | ||
742 | read_descriptor_t rd_desc; | ||
743 | |||
744 | read_lock(&sk->sk_callback_lock); | ||
745 | dprintk("RPC: xs_tcp_data_ready...\n"); | ||
746 | if (!(xprt = xprt_from_sock(sk))) | ||
747 | goto out; | ||
748 | if (xprt->shutdown) | ||
749 | goto out; | ||
750 | |||
751 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | ||
752 | rd_desc.arg.data = xprt; | ||
753 | rd_desc.count = 65536; | ||
754 | tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | ||
755 | out: | ||
756 | read_unlock(&sk->sk_callback_lock); | ||
757 | } | ||
758 | |||
759 | /** | ||
760 | * xs_tcp_state_change - callback to handle TCP socket state changes | ||
761 | * @sk: socket whose state has changed | ||
762 | * | ||
763 | */ | ||
764 | static void xs_tcp_state_change(struct sock *sk) | ||
765 | { | ||
766 | struct rpc_xprt *xprt; | ||
767 | |||
768 | read_lock(&sk->sk_callback_lock); | ||
769 | if (!(xprt = xprt_from_sock(sk))) | ||
770 | goto out; | ||
771 | dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); | ||
772 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | ||
773 | sk->sk_state, xprt_connected(xprt), | ||
774 | sock_flag(sk, SOCK_DEAD), | ||
775 | sock_flag(sk, SOCK_ZAPPED)); | ||
776 | |||
777 | switch (sk->sk_state) { | ||
778 | case TCP_ESTABLISHED: | ||
779 | spin_lock_bh(&xprt->transport_lock); | ||
780 | if (!xprt_test_and_set_connected(xprt)) { | ||
781 | /* Reset TCP record info */ | ||
782 | xprt->tcp_offset = 0; | ||
783 | xprt->tcp_reclen = 0; | ||
784 | xprt->tcp_copied = 0; | ||
785 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
786 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
787 | xprt_wake_pending_tasks(xprt, 0); | ||
788 | } | ||
789 | spin_unlock_bh(&xprt->transport_lock); | ||
790 | break; | ||
791 | case TCP_SYN_SENT: | ||
792 | case TCP_SYN_RECV: | ||
793 | break; | ||
794 | default: | ||
795 | xprt_disconnect(xprt); | ||
796 | break; | ||
797 | } | ||
798 | out: | ||
799 | read_unlock(&sk->sk_callback_lock); | ||
800 | } | ||
801 | |||
802 | /** | ||
803 | * xs_udp_write_space - callback invoked when socket buffer space | ||
804 | * becomes available | ||
805 | * @sk: socket whose state has changed | ||
806 | * | ||
807 | * Called when more output buffer space is available for this socket. | ||
808 | * We try not to wake our writers until they can make "significant" | ||
809 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
810 | * with a bunch of small requests. | ||
811 | */ | ||
812 | static void xs_udp_write_space(struct sock *sk) | ||
813 | { | ||
814 | read_lock(&sk->sk_callback_lock); | ||
815 | |||
816 | /* from net/core/sock.c:sock_def_write_space */ | ||
817 | if (sock_writeable(sk)) { | ||
818 | struct socket *sock; | ||
819 | struct rpc_xprt *xprt; | ||
820 | |||
821 | if (unlikely(!(sock = sk->sk_socket))) | ||
822 | goto out; | ||
823 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
824 | goto out; | ||
825 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
826 | goto out; | ||
827 | |||
828 | xprt_write_space(xprt); | ||
829 | } | ||
830 | |||
831 | out: | ||
832 | read_unlock(&sk->sk_callback_lock); | ||
833 | } | ||
834 | |||
835 | /** | ||
836 | * xs_tcp_write_space - callback invoked when socket buffer space | ||
837 | * becomes available | ||
838 | * @sk: socket whose state has changed | ||
839 | * | ||
840 | * Called when more output buffer space is available for this socket. | ||
841 | * We try not to wake our writers until they can make "significant" | ||
842 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
843 | * with a bunch of small requests. | ||
844 | */ | ||
845 | static void xs_tcp_write_space(struct sock *sk) | ||
846 | { | ||
847 | read_lock(&sk->sk_callback_lock); | ||
848 | |||
849 | /* from net/core/stream.c:sk_stream_write_space */ | ||
850 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
851 | struct socket *sock; | ||
852 | struct rpc_xprt *xprt; | ||
853 | |||
854 | if (unlikely(!(sock = sk->sk_socket))) | ||
855 | goto out; | ||
856 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
857 | goto out; | ||
858 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
859 | goto out; | ||
860 | |||
861 | xprt_write_space(xprt); | ||
862 | } | ||
863 | |||
864 | out: | ||
865 | read_unlock(&sk->sk_callback_lock); | ||
866 | } | ||
867 | |||
868 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) | ||
869 | { | ||
870 | struct sock *sk = xprt->inet; | ||
871 | |||
872 | if (xprt->rcvsize) { | ||
873 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
874 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
875 | } | ||
876 | if (xprt->sndsize) { | ||
877 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
878 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
879 | sk->sk_write_space(sk); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * xs_udp_set_buffer_size - set send and receive limits | ||
885 | * @xprt: generic transport | ||
886 | * @sndsize: requested size of send buffer, in bytes | ||
887 | * @rcvsize: requested size of receive buffer, in bytes | ||
888 | * | ||
889 | * Set socket send and receive buffer size limits. | ||
890 | */ | ||
891 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) | ||
892 | { | ||
893 | xprt->sndsize = 0; | ||
894 | if (sndsize) | ||
895 | xprt->sndsize = sndsize + 1024; | ||
896 | xprt->rcvsize = 0; | ||
897 | if (rcvsize) | ||
898 | xprt->rcvsize = rcvsize + 1024; | ||
899 | |||
900 | xs_udp_do_set_buffer_size(xprt); | ||
901 | } | ||
902 | |||
903 | /** | ||
904 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport | ||
905 | * @task: task that timed out | ||
906 | * | ||
907 | * Adjust the congestion window after a retransmit timeout has occurred. | ||
908 | */ | ||
909 | static void xs_udp_timer(struct rpc_task *task) | ||
910 | { | ||
911 | xprt_adjust_cwnd(task, -ETIMEDOUT); | ||
912 | } | ||
913 | |||
914 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | ||
915 | { | ||
916 | struct sockaddr_in myaddr = { | ||
917 | .sin_family = AF_INET, | ||
918 | }; | ||
919 | int err; | ||
920 | unsigned short port = xprt->port; | ||
921 | |||
922 | do { | ||
923 | myaddr.sin_port = htons(port); | ||
924 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
925 | sizeof(myaddr)); | ||
926 | if (err == 0) { | ||
927 | xprt->port = port; | ||
928 | dprintk("RPC: xs_bindresvport bound to port %u\n", | ||
929 | port); | ||
930 | return 0; | ||
931 | } | ||
932 | if (port <= xprt_min_resvport) | ||
933 | port = xprt_max_resvport; | ||
934 | else | ||
935 | port--; | ||
936 | } while (err == -EADDRINUSE && port != xprt->port); | ||
937 | |||
938 | dprintk("RPC: can't bind to reserved port (%d).\n", -err); | ||
939 | return err; | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * xs_udp_connect_worker - set up a UDP socket | ||
944 | * @args: RPC transport to connect | ||
945 | * | ||
946 | * Invoked by a work queue tasklet. | ||
947 | */ | ||
948 | static void xs_udp_connect_worker(void *args) | ||
949 | { | ||
950 | struct rpc_xprt *xprt = (struct rpc_xprt *) args; | ||
951 | struct socket *sock = xprt->sock; | ||
952 | int err, status = -EIO; | ||
953 | |||
954 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
955 | goto out; | ||
956 | |||
957 | dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); | ||
958 | |||
959 | /* Start by resetting any existing state */ | ||
960 | xs_close(xprt); | ||
961 | |||
962 | if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { | ||
963 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); | ||
964 | goto out; | ||
965 | } | ||
966 | |||
967 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
968 | sock_release(sock); | ||
969 | goto out; | ||
970 | } | ||
971 | |||
972 | if (!xprt->inet) { | ||
973 | struct sock *sk = sock->sk; | ||
974 | |||
975 | write_lock_bh(&sk->sk_callback_lock); | ||
976 | |||
977 | sk->sk_user_data = xprt; | ||
978 | xprt->old_data_ready = sk->sk_data_ready; | ||
979 | xprt->old_state_change = sk->sk_state_change; | ||
980 | xprt->old_write_space = sk->sk_write_space; | ||
981 | sk->sk_data_ready = xs_udp_data_ready; | ||
982 | sk->sk_write_space = xs_udp_write_space; | ||
983 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
984 | |||
985 | xprt_set_connected(xprt); | ||
986 | |||
987 | /* Reset to new socket */ | ||
988 | xprt->sock = sock; | ||
989 | xprt->inet = sk; | ||
990 | |||
991 | write_unlock_bh(&sk->sk_callback_lock); | ||
992 | } | ||
993 | xs_udp_do_set_buffer_size(xprt); | ||
994 | status = 0; | ||
995 | out: | ||
996 | xprt_wake_pending_tasks(xprt, status); | ||
997 | xprt_clear_connecting(xprt); | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * We need to preserve the port number so the reply cache on the server can | ||
1002 | * find our cached RPC replies when we get around to reconnecting. | ||
1003 | */ | ||
1004 | static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | ||
1005 | { | ||
1006 | int result; | ||
1007 | struct socket *sock = xprt->sock; | ||
1008 | struct sockaddr any; | ||
1009 | |||
1010 | dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); | ||
1011 | |||
1012 | /* | ||
1013 | * Disconnect the transport socket by doing a connect operation | ||
1014 | * with AF_UNSPEC. This should return immediately... | ||
1015 | */ | ||
1016 | memset(&any, 0, sizeof(any)); | ||
1017 | any.sa_family = AF_UNSPEC; | ||
1018 | result = sock->ops->connect(sock, &any, sizeof(any), 0); | ||
1019 | if (result) | ||
1020 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | ||
1021 | result); | ||
1022 | } | ||
1023 | |||
1024 | /** | ||
1025 | * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint | ||
1026 | * @args: RPC transport to connect | ||
1027 | * | ||
1028 | * Invoked by a work queue tasklet. | ||
1029 | */ | ||
1030 | static void xs_tcp_connect_worker(void *args) | ||
1031 | { | ||
1032 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
1033 | struct socket *sock = xprt->sock; | ||
1034 | int err, status = -EIO; | ||
1035 | |||
1036 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
1037 | goto out; | ||
1038 | |||
1039 | dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); | ||
1040 | |||
1041 | if (!xprt->sock) { | ||
1042 | /* start from scratch */ | ||
1043 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | ||
1044 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | ||
1045 | goto out; | ||
1046 | } | ||
1047 | |||
1048 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
1049 | sock_release(sock); | ||
1050 | goto out; | ||
1051 | } | ||
1052 | } else | ||
1053 | /* "close" the socket, preserving the local port */ | ||
1054 | xs_tcp_reuse_connection(xprt); | ||
1055 | |||
1056 | if (!xprt->inet) { | ||
1057 | struct sock *sk = sock->sk; | ||
1058 | |||
1059 | write_lock_bh(&sk->sk_callback_lock); | ||
1060 | |||
1061 | sk->sk_user_data = xprt; | ||
1062 | xprt->old_data_ready = sk->sk_data_ready; | ||
1063 | xprt->old_state_change = sk->sk_state_change; | ||
1064 | xprt->old_write_space = sk->sk_write_space; | ||
1065 | sk->sk_data_ready = xs_tcp_data_ready; | ||
1066 | sk->sk_state_change = xs_tcp_state_change; | ||
1067 | sk->sk_write_space = xs_tcp_write_space; | ||
1068 | |||
1069 | /* socket options */ | ||
1070 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
1071 | sock_reset_flag(sk, SOCK_LINGER); | ||
1072 | tcp_sk(sk)->linger2 = 0; | ||
1073 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
1074 | |||
1075 | xprt_clear_connected(xprt); | ||
1076 | |||
1077 | /* Reset to new socket */ | ||
1078 | xprt->sock = sock; | ||
1079 | xprt->inet = sk; | ||
1080 | |||
1081 | write_unlock_bh(&sk->sk_callback_lock); | ||
1082 | } | ||
1083 | |||
1084 | /* Tell the socket layer to start connecting... */ | ||
1085 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
1086 | sizeof(xprt->addr), O_NONBLOCK); | ||
1087 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
1088 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
1089 | if (status < 0) { | ||
1090 | switch (status) { | ||
1091 | case -EINPROGRESS: | ||
1092 | case -EALREADY: | ||
1093 | goto out_clear; | ||
1094 | case -ECONNREFUSED: | ||
1095 | case -ECONNRESET: | ||
1096 | /* retry with existing socket, after a delay */ | ||
1097 | break; | ||
1098 | default: | ||
1099 | /* get rid of existing socket, and retry */ | ||
1100 | xs_close(xprt); | ||
1101 | break; | ||
1102 | } | ||
1103 | } | ||
1104 | out: | ||
1105 | xprt_wake_pending_tasks(xprt, status); | ||
1106 | out_clear: | ||
1107 | xprt_clear_connecting(xprt); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * xs_connect - connect a socket to a remote endpoint | ||
1112 | * @task: address of RPC task that manages state of connect request | ||
1113 | * | ||
1114 | * TCP: If the remote end dropped the connection, delay reconnecting. | ||
1115 | * | ||
1116 | * UDP socket connects are synchronous, but we use a work queue anyway | ||
1117 | * to guarantee that even unprivileged user processes can set up a | ||
1118 | * socket on a privileged port. | ||
1119 | * | ||
1120 | * If a UDP socket connect fails, the delay behavior here prevents | ||
1121 | * retry floods (hard mounts). | ||
1122 | */ | ||
1123 | static void xs_connect(struct rpc_task *task) | ||
1124 | { | ||
1125 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1126 | |||
1127 | if (xprt_test_and_set_connecting(xprt)) | ||
1128 | return; | ||
1129 | |||
1130 | if (xprt->sock != NULL) { | ||
1131 | dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", | ||
1132 | xprt, xprt->reestablish_timeout / HZ); | ||
1133 | schedule_delayed_work(&xprt->connect_worker, | ||
1134 | xprt->reestablish_timeout); | ||
1135 | xprt->reestablish_timeout <<= 1; | ||
1136 | if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) | ||
1137 | xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; | ||
1138 | } else { | ||
1139 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); | ||
1140 | schedule_work(&xprt->connect_worker); | ||
1141 | |||
1142 | /* flush_scheduled_work can sleep... */ | ||
1143 | if (!RPC_IS_ASYNC(task)) | ||
1144 | flush_scheduled_work(); | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | static struct rpc_xprt_ops xs_udp_ops = { | ||
1149 | .set_buffer_size = xs_udp_set_buffer_size, | ||
1150 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
1151 | .release_xprt = xprt_release_xprt_cong, | ||
1152 | .connect = xs_connect, | ||
1153 | .send_request = xs_udp_send_request, | ||
1154 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | ||
1155 | .timer = xs_udp_timer, | ||
1156 | .release_request = xprt_release_rqst_cong, | ||
1157 | .close = xs_close, | ||
1158 | .destroy = xs_destroy, | ||
1159 | }; | ||
1160 | |||
1161 | static struct rpc_xprt_ops xs_tcp_ops = { | ||
1162 | .reserve_xprt = xprt_reserve_xprt, | ||
1163 | .release_xprt = xprt_release_xprt, | ||
1164 | .connect = xs_connect, | ||
1165 | .send_request = xs_tcp_send_request, | ||
1166 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | ||
1167 | .close = xs_close, | ||
1168 | .destroy = xs_destroy, | ||
1169 | }; | ||
1170 | |||
1171 | /** | ||
1172 | * xs_setup_udp - Set up transport to use a UDP socket | ||
1173 | * @xprt: transport to set up | ||
1174 | * @to: timeout parameters | ||
1175 | * | ||
1176 | */ | ||
1177 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1178 | { | ||
1179 | size_t slot_table_size; | ||
1180 | |||
1181 | dprintk("RPC: setting up udp-ipv4 transport...\n"); | ||
1182 | |||
1183 | xprt->max_reqs = xprt_udp_slot_table_entries; | ||
1184 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1185 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1186 | if (xprt->slot == NULL) | ||
1187 | return -ENOMEM; | ||
1188 | memset(xprt->slot, 0, slot_table_size); | ||
1189 | |||
1190 | xprt->prot = IPPROTO_UDP; | ||
1191 | xprt->port = xprt_max_resvport; | ||
1192 | xprt->tsh_size = 0; | ||
1193 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1194 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | ||
1195 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | ||
1196 | |||
1197 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); | ||
1198 | xprt->bind_timeout = XS_BIND_TO; | ||
1199 | xprt->connect_timeout = XS_UDP_CONN_TO; | ||
1200 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | ||
1201 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1202 | |||
1203 | xprt->ops = &xs_udp_ops; | ||
1204 | |||
1205 | if (to) | ||
1206 | xprt->timeout = *to; | ||
1207 | else | ||
1208 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1213 | /** | ||
1214 | * xs_setup_tcp - Set up transport to use a TCP socket | ||
1215 | * @xprt: transport to set up | ||
1216 | * @to: timeout parameters | ||
1217 | * | ||
1218 | */ | ||
1219 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1220 | { | ||
1221 | size_t slot_table_size; | ||
1222 | |||
1223 | dprintk("RPC: setting up tcp-ipv4 transport...\n"); | ||
1224 | |||
1225 | xprt->max_reqs = xprt_tcp_slot_table_entries; | ||
1226 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1227 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1228 | if (xprt->slot == NULL) | ||
1229 | return -ENOMEM; | ||
1230 | memset(xprt->slot, 0, slot_table_size); | ||
1231 | |||
1232 | xprt->prot = IPPROTO_TCP; | ||
1233 | xprt->port = xprt_max_resvport; | ||
1234 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | ||
1235 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1236 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | ||
1237 | |||
1238 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | ||
1239 | xprt->bind_timeout = XS_BIND_TO; | ||
1240 | xprt->connect_timeout = XS_TCP_CONN_TO; | ||
1241 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
1242 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1243 | |||
1244 | xprt->ops = &xs_tcp_ops; | ||
1245 | |||
1246 | if (to) | ||
1247 | xprt->timeout = *to; | ||
1248 | else | ||
1249 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | ||
1250 | |||
1251 | return 0; | ||
1252 | } | ||