aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/vfs_dir.c14
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/attr.c5
-rw-r--r--fs/btrfs/file.c33
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/ioctl.c32
-rw-r--r--fs/btrfs/xattr.c50
-rw-r--r--fs/cifs/cifsencrypt.c54
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/xattr.c40
-rw-r--r--fs/coda/coda_linux.h5
-rw-r--r--fs/compat.c1
-rw-r--r--fs/configfs/inode.c3
-rw-r--r--fs/configfs/item.c2
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext2/xattr_security.c34
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/xattr_security.c36
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c3
-rw-r--r--fs/ext4/xattr_security.c36
-rw-r--r--fs/gfs2/inode.c38
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/hfsplus/super.c15
-rw-r--r--fs/hfsplus/wrapper.c4
-rw-r--r--fs/jffs2/security.c35
-rw-r--r--fs/jfs/xattr.c57
-rw-r--r--fs/lockd/host.c25
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c225
-rw-r--r--fs/namei.c12
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c58
-rw-r--r--fs/nfs/blocklayout/blocklayout.h4
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c35
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/fscache-index.c4
-rw-r--r--fs/nfs/idmap.c25
-rw-r--r--fs/nfs/inode.c16
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4filelayout.c33
-rw-r--r--fs/nfs/nfs4proc.c113
-rw-r--r--fs/nfs/nfs4renewd.c12
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/pnfs.c52
-rw-r--r--fs/nfs/pnfs.h5
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c42
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfs/write.c75
-rw-r--r--fs/nfsd/export.c16
-rw-r--r--fs/nfsd/nfs4callback.c20
-rw-r--r--fs/nfsd/nfs4proc.c374
-rw-r--r--fs/nfsd/nfs4recover.c53
-rw-r--r--fs/nfsd/nfs4state.c1794
-rw-r--r--fs/nfsd/nfs4xdr.c380
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h33
-rw-r--r--fs/nfsd/nfsfh.c39
-rw-r--r--fs/nfsd/state.h174
-rw-r--r--fs/nfsd/vfs.c31
-rw-r--r--fs/nfsd/vfs.h29
-rw-r--r--fs/nfsd/xdr4.h28
-rw-r--r--fs/ocfs2/xattr.c38
-rw-r--r--fs/proc/stat.c41
-rw-r--r--fs/proc/task_mmu.c80
-rw-r--r--fs/quota/quota.c2
-rw-r--r--fs/reiserfs/journal.c9
-rw-r--r--fs/reiserfs/resize.c4
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/squashfs/Kconfig6
-rw-r--r--fs/stat.c2
-rw-r--r--fs/sysfs/dir.c182
-rw-r--r--fs/sysfs/file.c56
-rw-r--r--fs/sysfs/inode.c16
-rw-r--r--fs/sysfs/sysfs.h17
-rw-r--r--fs/xattr.c63
-rw-r--r--fs/xfs/kmem.h7
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_buf_item.c3
-rw-r--r--fs/xfs/xfs_dquot_item.c10
-rw-r--r--fs/xfs/xfs_inode_item.c10
-rw-r--r--fs/xfs/xfs_iops.c39
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_super.c13
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c83
-rw-r--r--fs/xfs/xfs_trans_priv.h8
99 files changed, 2849 insertions, 2272 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index ef9661886112..2b78014a124a 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -132,21 +132,19 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
132 options = tmp_options; 132 options = tmp_options;
133 133
134 while ((p = strsep(&options, ",")) != NULL) { 134 while ((p = strsep(&options, ",")) != NULL) {
135 int token; 135 int token, r;
136 if (!*p) 136 if (!*p)
137 continue; 137 continue;
138 token = match_token(p, tokens, args); 138 token = match_token(p, tokens, args);
139 if (token < Opt_uname) { 139 switch (token) {
140 int r = match_int(&args[0], &option); 140 case Opt_debug:
141 r = match_int(&args[0], &option);
141 if (r < 0) { 142 if (r < 0) {
142 P9_DPRINTK(P9_DEBUG_ERROR, 143 P9_DPRINTK(P9_DEBUG_ERROR,
143 "integer field, but no integer?\n"); 144 "integer field, but no integer?\n");
144 ret = r; 145 ret = r;
145 continue; 146 continue;
146 } 147 }
147 }
148 switch (token) {
149 case Opt_debug:
150 v9ses->debug = option; 148 v9ses->debug = option;
151#ifdef CONFIG_NET_9P_DEBUG 149#ifdef CONFIG_NET_9P_DEBUG
152 p9_debug_level = option; 150 p9_debug_level = option;
@@ -154,12 +152,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
154 break; 152 break;
155 153
156 case Opt_dfltuid: 154 case Opt_dfltuid:
155 r = match_int(&args[0], &option);
156 if (r < 0) {
157 P9_DPRINTK(P9_DEBUG_ERROR,
158 "integer field, but no integer?\n");
159 ret = r;
160 continue;
161 }
157 v9ses->dfltuid = option; 162 v9ses->dfltuid = option;
158 break; 163 break;
159 case Opt_dfltgid: 164 case Opt_dfltgid:
165 r = match_int(&args[0], &option);
166 if (r < 0) {
167 P9_DPRINTK(P9_DEBUG_ERROR,
168 "integer field, but no integer?\n");
169 ret = r;
170 continue;
171 }
160 v9ses->dfltgid = option; 172 v9ses->dfltgid = option;
161 break; 173 break;
162 case Opt_afid: 174 case Opt_afid:
175 r = match_int(&args[0], &option);
176 if (r < 0) {
177 P9_DPRINTK(P9_DEBUG_ERROR,
178 "integer field, but no integer?\n");
179 ret = r;
180 continue;
181 }
163 v9ses->afid = option; 182 v9ses->afid = option;
164 break; 183 break;
165 case Opt_uname: 184 case Opt_uname:
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 9c2bdda5cd9d..598fff1a54e5 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -165,9 +165,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
165 } 165 }
166 while (rdir->head < rdir->tail) { 166 while (rdir->head < rdir->tail) {
167 p9stat_init(&st); 167 p9stat_init(&st);
168 err = p9stat_read(rdir->buf + rdir->head, 168 err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
169 rdir->tail - rdir->head, &st, 169 rdir->tail - rdir->head, &st);
170 fid->clnt->proto_version);
171 if (err) { 170 if (err) {
172 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 171 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
173 err = -EIO; 172 err = -EIO;
@@ -231,7 +230,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
231 while (err == 0) { 230 while (err == 0) {
232 if (rdir->tail == rdir->head) { 231 if (rdir->tail == rdir->head) {
233 err = p9_client_readdir(fid, rdir->buf, buflen, 232 err = p9_client_readdir(fid, rdir->buf, buflen,
234 filp->f_pos); 233 filp->f_pos);
235 if (err <= 0) 234 if (err <= 0)
236 goto unlock_and_exit; 235 goto unlock_and_exit;
237 236
@@ -241,10 +240,9 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
241 240
242 while (rdir->head < rdir->tail) { 241 while (rdir->head < rdir->tail) {
243 242
244 err = p9dirent_read(rdir->buf + rdir->head, 243 err = p9dirent_read(fid->clnt, rdir->buf + rdir->head,
245 rdir->tail - rdir->head, 244 rdir->tail - rdir->head,
246 &curdirent, 245 &curdirent);
247 fid->clnt->proto_version);
248 if (err < 0) { 246 if (err < 0) {
249 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 247 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
250 err = -EIO; 248 err = -EIO;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e3c03db3c788..b5a1076aaa6c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -278,10 +278,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
278 case S_IFSOCK: 278 case S_IFSOCK:
279 if (v9fs_proto_dotl(v9ses)) { 279 if (v9fs_proto_dotl(v9ses)) {
280 inode->i_op = &v9fs_file_inode_operations_dotl; 280 inode->i_op = &v9fs_file_inode_operations_dotl;
281 inode->i_fop = &v9fs_file_operations_dotl;
282 } else if (v9fs_proto_dotu(v9ses)) { 281 } else if (v9fs_proto_dotu(v9ses)) {
283 inode->i_op = &v9fs_file_inode_operations; 282 inode->i_op = &v9fs_file_inode_operations;
284 inode->i_fop = &v9fs_file_operations;
285 } else { 283 } else {
286 P9_DPRINTK(P9_DEBUG_ERROR, 284 P9_DPRINTK(P9_DEBUG_ERROR,
287 "special files without extended mode\n"); 285 "special files without extended mode\n");
diff --git a/fs/attr.c b/fs/attr.c
index 538e27959d3f..7ee7ba488313 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -13,6 +13,7 @@
13#include <linux/fsnotify.h> 13#include <linux/fsnotify.h>
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/evm.h>
16 17
17/** 18/**
18 * inode_change_ok - check if attribute changes to an inode are allowed 19 * inode_change_ok - check if attribute changes to an inode are allowed
@@ -237,8 +238,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
237 else 238 else
238 error = simple_setattr(dentry, attr); 239 error = simple_setattr(dentry, attr);
239 240
240 if (!error) 241 if (!error) {
241 fsnotify_change(dentry, ia_valid); 242 fsnotify_change(dentry, ia_valid);
243 evm_inode_post_setattr(dentry, ia_valid);
244 }
242 245
243 return error; 246 return error;
244} 247}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3c3abff731a7..e4e57d59edb7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1036,11 +1036,13 @@ out:
1036 * on error we return an unlocked page and the error value 1036 * on error we return an unlocked page and the error value
1037 * on success we return a locked page and 0 1037 * on success we return a locked page and 0
1038 */ 1038 */
1039static int prepare_uptodate_page(struct page *page, u64 pos) 1039static int prepare_uptodate_page(struct page *page, u64 pos,
1040 bool force_uptodate)
1040{ 1041{
1041 int ret = 0; 1042 int ret = 0;
1042 1043
1043 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { 1044 if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
1045 !PageUptodate(page)) {
1044 ret = btrfs_readpage(NULL, page); 1046 ret = btrfs_readpage(NULL, page);
1045 if (ret) 1047 if (ret)
1046 return ret; 1048 return ret;
@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
1061static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1063static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1062 struct page **pages, size_t num_pages, 1064 struct page **pages, size_t num_pages,
1063 loff_t pos, unsigned long first_index, 1065 loff_t pos, unsigned long first_index,
1064 size_t write_bytes) 1066 size_t write_bytes, bool force_uptodate)
1065{ 1067{
1066 struct extent_state *cached_state = NULL; 1068 struct extent_state *cached_state = NULL;
1067 int i; 1069 int i;
@@ -1086,10 +1088,11 @@ again:
1086 } 1088 }
1087 1089
1088 if (i == 0) 1090 if (i == 0)
1089 err = prepare_uptodate_page(pages[i], pos); 1091 err = prepare_uptodate_page(pages[i], pos,
1092 force_uptodate);
1090 if (i == num_pages - 1) 1093 if (i == num_pages - 1)
1091 err = prepare_uptodate_page(pages[i], 1094 err = prepare_uptodate_page(pages[i],
1092 pos + write_bytes); 1095 pos + write_bytes, false);
1093 if (err) { 1096 if (err) {
1094 page_cache_release(pages[i]); 1097 page_cache_release(pages[i]);
1095 faili = i - 1; 1098 faili = i - 1;
@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1158 size_t num_written = 0; 1161 size_t num_written = 0;
1159 int nrptrs; 1162 int nrptrs;
1160 int ret = 0; 1163 int ret = 0;
1164 bool force_page_uptodate = false;
1161 1165
1162 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1166 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1163 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1167 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1200 * contents of pages from loop to loop 1204 * contents of pages from loop to loop
1201 */ 1205 */
1202 ret = prepare_pages(root, file, pages, num_pages, 1206 ret = prepare_pages(root, file, pages, num_pages,
1203 pos, first_index, write_bytes); 1207 pos, first_index, write_bytes,
1208 force_page_uptodate);
1204 if (ret) { 1209 if (ret) {
1205 btrfs_delalloc_release_space(inode, 1210 btrfs_delalloc_release_space(inode,
1206 num_pages << PAGE_CACHE_SHIFT); 1211 num_pages << PAGE_CACHE_SHIFT);
@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1217 if (copied < write_bytes) 1222 if (copied < write_bytes)
1218 nrptrs = 1; 1223 nrptrs = 1;
1219 1224
1220 if (copied == 0) 1225 if (copied == 0) {
1226 force_page_uptodate = true;
1221 dirty_pages = 0; 1227 dirty_pages = 0;
1222 else 1228 } else {
1229 force_page_uptodate = false;
1223 dirty_pages = (copied + offset + 1230 dirty_pages = (copied + offset +
1224 PAGE_CACHE_SIZE - 1) >> 1231 PAGE_CACHE_SIZE - 1) >>
1225 PAGE_CACHE_SHIFT; 1232 PAGE_CACHE_SHIFT;
1233 }
1226 1234
1227 /* 1235 /*
1228 * If we had a short copy we need to release the excess delaloc 1236 * If we had a short copy we need to release the excess delaloc
@@ -1817,6 +1825,11 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1817 goto out; 1825 goto out;
1818 case SEEK_DATA: 1826 case SEEK_DATA:
1819 case SEEK_HOLE: 1827 case SEEK_HOLE:
1828 if (offset >= i_size_read(inode)) {
1829 mutex_unlock(&inode->i_mutex);
1830 return -ENXIO;
1831 }
1832
1820 ret = find_desired_extent(inode, &offset, origin); 1833 ret = find_desired_extent(inode, &offset, origin);
1821 if (ret) { 1834 if (ret) {
1822 mutex_unlock(&inode->i_mutex); 1835 mutex_unlock(&inode->i_mutex);
@@ -1825,11 +1838,11 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1825 } 1838 }
1826 1839
1827 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { 1840 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
1828 ret = -EINVAL; 1841 offset = -EINVAL;
1829 goto out; 1842 goto out;
1830 } 1843 }
1831 if (offset > inode->i_sb->s_maxbytes) { 1844 if (offset > inode->i_sb->s_maxbytes) {
1832 ret = -EINVAL; 1845 offset = -EINVAL;
1833 goto out; 1846 goto out;
1834 } 1847 }
1835 1848
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4d14de6d121b..b2d004ad66a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4018,7 +4018,8 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4018 memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); 4018 memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
4019 kfree(dentry->d_fsdata); 4019 kfree(dentry->d_fsdata);
4020 dentry->d_fsdata = NULL; 4020 dentry->d_fsdata = NULL;
4021 d_clear_need_lookup(dentry); 4021 /* This thing is hashed, drop it for now */
4022 d_drop(dentry);
4022 } else { 4023 } else {
4023 ret = btrfs_inode_by_name(dir, dentry, &location); 4024 ret = btrfs_inode_by_name(dir, dentry, &location);
4024 } 4025 }
@@ -4085,7 +4086,15 @@ static void btrfs_dentry_release(struct dentry *dentry)
4085static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4086static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4086 struct nameidata *nd) 4087 struct nameidata *nd)
4087{ 4088{
4088 return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); 4089 struct dentry *ret;
4090
4091 ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
4092 if (unlikely(d_need_lookup(dentry))) {
4093 spin_lock(&dentry->d_lock);
4094 dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
4095 spin_unlock(&dentry->d_lock);
4096 }
4097 return ret;
4089} 4098}
4090 4099
4091unsigned char btrfs_filetype_table[] = { 4100unsigned char btrfs_filetype_table[] = {
@@ -4125,7 +4134,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4125 4134
4126 /* special case for "." */ 4135 /* special case for "." */
4127 if (filp->f_pos == 0) { 4136 if (filp->f_pos == 0) {
4128 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); 4137 over = filldir(dirent, ".", 1,
4138 filp->f_pos, btrfs_ino(inode), DT_DIR);
4129 if (over) 4139 if (over)
4130 return 0; 4140 return 0;
4131 filp->f_pos = 1; 4141 filp->f_pos = 1;
@@ -4134,7 +4144,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4134 if (filp->f_pos == 1) { 4144 if (filp->f_pos == 1) {
4135 u64 pino = parent_ino(filp->f_path.dentry); 4145 u64 pino = parent_ino(filp->f_path.dentry);
4136 over = filldir(dirent, "..", 2, 4146 over = filldir(dirent, "..", 2,
4137 2, pino, DT_DIR); 4147 filp->f_pos, pino, DT_DIR);
4138 if (over) 4148 if (over)
4139 return 0; 4149 return 0;
4140 filp->f_pos = 2; 4150 filp->f_pos = 2;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3351b1b24574..dae5dfe41ba5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1047,7 +1047,16 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1047 if (!max_to_defrag) 1047 if (!max_to_defrag)
1048 max_to_defrag = last_index - 1; 1048 max_to_defrag = last_index - 1;
1049 1049
1050 while (i <= last_index && defrag_count < max_to_defrag) { 1050 /*
1051 * make writeback starts from i, so the defrag range can be
1052 * written sequentially.
1053 */
1054 if (i < inode->i_mapping->writeback_index)
1055 inode->i_mapping->writeback_index = i;
1056
1057 while (i <= last_index && defrag_count < max_to_defrag &&
1058 (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
1059 PAGE_CACHE_SHIFT)) {
1051 /* 1060 /*
1052 * make sure we stop running if someone unmounts 1061 * make sure we stop running if someone unmounts
1053 * the FS 1062 * the FS
@@ -2177,6 +2186,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2177 if (!(src_file->f_mode & FMODE_READ)) 2186 if (!(src_file->f_mode & FMODE_READ))
2178 goto out_fput; 2187 goto out_fput;
2179 2188
2189 /* don't make the dst file partly checksummed */
2190 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2191 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
2192 goto out_fput;
2193
2180 ret = -EISDIR; 2194 ret = -EISDIR;
2181 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 2195 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
2182 goto out_fput; 2196 goto out_fput;
@@ -2226,6 +2240,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2226 goto out_unlock; 2240 goto out_unlock;
2227 } 2241 }
2228 2242
2243 /* truncate page cache pages from target inode range */
2244 truncate_inode_pages_range(&inode->i_data, destoff,
2245 PAGE_CACHE_ALIGN(destoff + len) - 1);
2246
2229 /* do any pending delalloc/csum calc on src, one way or 2247 /* do any pending delalloc/csum calc on src, one way or
2230 another, and lock file content */ 2248 another, and lock file content */
2231 while (1) { 2249 while (1) {
@@ -2242,10 +2260,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2242 btrfs_wait_ordered_range(src, off, len); 2260 btrfs_wait_ordered_range(src, off, len);
2243 } 2261 }
2244 2262
2245 /* truncate page cache pages from target inode range */
2246 truncate_inode_pages_range(&inode->i_data, off,
2247 ALIGN(off + len, PAGE_CACHE_SIZE) - 1);
2248
2249 /* clone data */ 2263 /* clone data */
2250 key.objectid = btrfs_ino(src); 2264 key.objectid = btrfs_ino(src);
2251 key.type = BTRFS_EXTENT_DATA_KEY; 2265 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2323,7 +2337,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2323 else 2337 else
2324 new_key.offset = destoff; 2338 new_key.offset = destoff;
2325 2339
2326 trans = btrfs_start_transaction(root, 1); 2340 /*
2341 * 1 - adjusting old extent (we may have to split it)
2342 * 1 - add new extent
2343 * 1 - inode update
2344 */
2345 trans = btrfs_start_transaction(root, 3);
2327 if (IS_ERR(trans)) { 2346 if (IS_ERR(trans)) {
2328 ret = PTR_ERR(trans); 2347 ret = PTR_ERR(trans);
2329 goto out; 2348 goto out;
@@ -2442,7 +2461,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2442 if (endoff > inode->i_size) 2461 if (endoff > inode->i_size)
2443 btrfs_i_size_write(inode, endoff); 2462 btrfs_i_size_write(inode, endoff);
2444 2463
2445 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
2446 ret = btrfs_update_inode(trans, root, inode); 2464 ret = btrfs_update_inode(trans, root, inode);
2447 BUG_ON(ret); 2465 BUG_ON(ret);
2448 btrfs_end_transaction(trans, root); 2466 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 69565e5fc6a0..426aa464f1af 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -383,36 +383,36 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
383 XATTR_REPLACE); 383 XATTR_REPLACE);
384} 384}
385 385
386int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 386int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
387 struct inode *inode, struct inode *dir, 387 void *fs_info)
388 const struct qstr *qstr)
389{ 388{
390 int err; 389 const struct xattr *xattr;
391 size_t len; 390 struct btrfs_trans_handle *trans = fs_info;
392 void *value;
393 char *suffix;
394 char *name; 391 char *name;
392 int err = 0;
395 393
396 err = security_inode_init_security(inode, dir, qstr, &suffix, &value, 394 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
397 &len); 395 name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
398 if (err) { 396 strlen(xattr->name) + 1, GFP_NOFS);
399 if (err == -EOPNOTSUPP) 397 if (!name) {
400 return 0; 398 err = -ENOMEM;
401 return err; 399 break;
402 } 400 }
403
404 name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1,
405 GFP_NOFS);
406 if (!name) {
407 err = -ENOMEM;
408 } else {
409 strcpy(name, XATTR_SECURITY_PREFIX); 401 strcpy(name, XATTR_SECURITY_PREFIX);
410 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 402 strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
411 err = __btrfs_setxattr(trans, inode, name, value, len, 0); 403 err = __btrfs_setxattr(trans, inode, name,
404 xattr->value, xattr->value_len, 0);
412 kfree(name); 405 kfree(name);
406 if (err < 0)
407 break;
413 } 408 }
414
415 kfree(suffix);
416 kfree(value);
417 return err; 409 return err;
418} 410}
411
412int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
413 struct inode *inode, struct inode *dir,
414 const struct qstr *qstr)
415{
416 return security_inode_init_security(inode, dir, qstr,
417 &btrfs_initxattrs, trans);
418}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e76bfeb68267..30acd22147e1 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -351,9 +351,7 @@ static int
351build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) 351build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
352{ 352{
353 unsigned int dlen; 353 unsigned int dlen;
354 unsigned int wlen; 354 unsigned int size = 2 * sizeof(struct ntlmssp2_name);
355 unsigned int size = 6 * sizeof(struct ntlmssp2_name);
356 __le64 curtime;
357 char *defdmname = "WORKGROUP"; 355 char *defdmname = "WORKGROUP";
358 unsigned char *blobptr; 356 unsigned char *blobptr;
359 struct ntlmssp2_name *attrptr; 357 struct ntlmssp2_name *attrptr;
@@ -365,15 +363,14 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
365 } 363 }
366 364
367 dlen = strlen(ses->domainName); 365 dlen = strlen(ses->domainName);
368 wlen = strlen(ses->server->hostname);
369 366
370 /* The length of this blob is a size which is 367 /*
371 * six times the size of a structure which holds name/size + 368 * The length of this blob is two times the size of a
372 * two times the unicode length of a domain name + 369 * structure (av pair) which holds name/size
373 * two times the unicode length of a server name + 370 * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) +
374 * size of a timestamp (which is 8 bytes). 371 * unicode length of a netbios domain name
375 */ 372 */
376 ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; 373 ses->auth_key.len = size + 2 * dlen;
377 ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); 374 ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
378 if (!ses->auth_key.response) { 375 if (!ses->auth_key.response) {
379 ses->auth_key.len = 0; 376 ses->auth_key.len = 0;
@@ -384,44 +381,15 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
384 blobptr = ses->auth_key.response; 381 blobptr = ses->auth_key.response;
385 attrptr = (struct ntlmssp2_name *) blobptr; 382 attrptr = (struct ntlmssp2_name *) blobptr;
386 383
384 /*
385 * As defined in MS-NTLM 3.3.2, just this av pair field
386 * is sufficient as part of the temp
387 */
387 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); 388 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
388 attrptr->length = cpu_to_le16(2 * dlen); 389 attrptr->length = cpu_to_le16(2 * dlen);
389 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); 390 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name);
390 cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); 391 cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp);
391 392
392 blobptr += 2 * dlen;
393 attrptr = (struct ntlmssp2_name *) blobptr;
394
395 attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_COMPUTER_NAME);
396 attrptr->length = cpu_to_le16(2 * wlen);
397 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name);
398 cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp);
399
400 blobptr += 2 * wlen;
401 attrptr = (struct ntlmssp2_name *) blobptr;
402
403 attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_DOMAIN_NAME);
404 attrptr->length = cpu_to_le16(2 * dlen);
405 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name);
406 cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp);
407
408 blobptr += 2 * dlen;
409 attrptr = (struct ntlmssp2_name *) blobptr;
410
411 attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_COMPUTER_NAME);
412 attrptr->length = cpu_to_le16(2 * wlen);
413 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name);
414 cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp);
415
416 blobptr += 2 * wlen;
417 attrptr = (struct ntlmssp2_name *) blobptr;
418
419 attrptr->type = cpu_to_le16(NTLMSSP_AV_TIMESTAMP);
420 attrptr->length = cpu_to_le16(sizeof(__le64));
421 blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name);
422 curtime = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
423 memcpy(blobptr, &curtime, sizeof(__le64));
424
425 return 0; 393 return 0;
426} 394}
427 395
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f93eb948d071..54b8f1e7da94 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -548,6 +548,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
548 struct inode *dir = dentry->d_inode; 548 struct inode *dir = dentry->d_inode;
549 struct dentry *child; 549 struct dentry *child;
550 550
551 if (!dir) {
552 dput(dentry);
553 dentry = ERR_PTR(-ENOENT);
554 break;
555 }
556
551 /* skip separators */ 557 /* skip separators */
552 while (*s == sep) 558 while (*s == sep)
553 s++; 559 s++;
@@ -563,10 +569,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
563 mutex_unlock(&dir->i_mutex); 569 mutex_unlock(&dir->i_mutex);
564 dput(dentry); 570 dput(dentry);
565 dentry = child; 571 dentry = child;
566 if (!dentry->d_inode) {
567 dput(dentry);
568 dentry = ERR_PTR(-ENOENT);
569 }
570 } while (!IS_ERR(dentry)); 572 } while (!IS_ERR(dentry));
571 _FreeXid(xid); 573 _FreeXid(xid);
572 kfree(full_path); 574 kfree(full_path);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index aac37d99a487..a80f7bd97b90 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
4079 T2_FNEXT_RSP_PARMS *parms; 4079 T2_FNEXT_RSP_PARMS *parms;
4080 char *response_data; 4080 char *response_data;
4081 int rc = 0; 4081 int rc = 0;
4082 int bytes_returned, name_len; 4082 int bytes_returned;
4083 unsigned int name_len;
4083 __u16 params, byte_count; 4084 __u16 params, byte_count;
4084 4085
4085 cFYI(1, "In FindNext"); 4086 cFYI(1, "In FindNext");
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 633c246b6775..62abf9fd6ff0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1298,7 +1298,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1298 /* ignore */ 1298 /* ignore */
1299 } else if (strnicmp(data, "guest", 5) == 0) { 1299 } else if (strnicmp(data, "guest", 5) == 0) {
1300 /* ignore */ 1300 /* ignore */
1301 } else if (strnicmp(data, "rw", 2) == 0) { 1301 } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) {
1302 /* ignore */ 1302 /* ignore */
1303 } else if (strnicmp(data, "ro", 2) == 0) { 1303 } else if (strnicmp(data, "ro", 2) == 0) {
1304 /* ignore */ 1304 /* ignore */
@@ -1401,7 +1401,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1401 vol->server_ino = 1; 1401 vol->server_ino = 1;
1402 } else if (strnicmp(data, "noserverino", 9) == 0) { 1402 } else if (strnicmp(data, "noserverino", 9) == 0) {
1403 vol->server_ino = 0; 1403 vol->server_ino = 0;
1404 } else if (strnicmp(data, "rwpidforward", 4) == 0) { 1404 } else if (strnicmp(data, "rwpidforward", 12) == 0) {
1405 vol->rwpidforward = 1; 1405 vol->rwpidforward = 1;
1406 } else if (strnicmp(data, "cifsacl", 7) == 0) { 1406 } else if (strnicmp(data, "cifsacl", 7) == 0) {
1407 vol->cifs_acl = 1; 1407 vol->cifs_acl = 1;
@@ -2018,7 +2018,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
2018 warned_on_ntlm = true; 2018 warned_on_ntlm = true;
2019 cERROR(1, "default security mechanism requested. The default " 2019 cERROR(1, "default security mechanism requested. The default "
2020 "security mechanism will be upgraded from ntlm to " 2020 "security mechanism will be upgraded from ntlm to "
2021 "ntlmv2 in kernel release 3.1"); 2021 "ntlmv2 in kernel release 3.2");
2022 } 2022 }
2023 ses->overrideSecFlg = volume_info->secFlg; 2023 ses->overrideSecFlg = volume_info->secFlg;
2024 2024
@@ -2877,9 +2877,9 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
2877{ 2877{
2878 kfree(volume_info->username); 2878 kfree(volume_info->username);
2879 kzfree(volume_info->password); 2879 kzfree(volume_info->password);
2880 kfree(volume_info->UNC);
2881 if (volume_info->UNCip != volume_info->UNC + 2) 2880 if (volume_info->UNCip != volume_info->UNC + 2)
2882 kfree(volume_info->UNCip); 2881 kfree(volume_info->UNCip);
2882 kfree(volume_info->UNC);
2883 kfree(volume_info->domainname); 2883 kfree(volume_info->domainname);
2884 kfree(volume_info->iocharset); 2884 kfree(volume_info->iocharset);
2885 kfree(volume_info->prepath); 2885 kfree(volume_info->prepath);
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 2a22fb2989e4..c32308882148 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/posix_acl_xattr.h> 23#include <linux/posix_acl_xattr.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/xattr.h>
25#include "cifsfs.h" 26#include "cifsfs.h"
26#include "cifspdu.h" 27#include "cifspdu.h"
27#include "cifsglob.h" 28#include "cifsglob.h"
@@ -31,16 +32,8 @@
31#define MAX_EA_VALUE_SIZE 65535 32#define MAX_EA_VALUE_SIZE 65535
32#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" 33#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib"
33#define CIFS_XATTR_CIFS_ACL "system.cifs_acl" 34#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
34#define CIFS_XATTR_USER_PREFIX "user."
35#define CIFS_XATTR_SYSTEM_PREFIX "system."
36#define CIFS_XATTR_OS2_PREFIX "os2."
37#define CIFS_XATTR_SECURITY_PREFIX "security."
38#define CIFS_XATTR_TRUSTED_PREFIX "trusted."
39#define XATTR_TRUSTED_PREFIX_LEN 8
40#define XATTR_SECURITY_PREFIX_LEN 9
41/* BB need to add server (Samba e.g) support for security and trusted prefix */
42
43 35
36/* BB need to add server (Samba e.g) support for security and trusted prefix */
44 37
45int cifs_removexattr(struct dentry *direntry, const char *ea_name) 38int cifs_removexattr(struct dentry *direntry, const char *ea_name)
46{ 39{
@@ -76,8 +69,8 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
76 } 69 }
77 if (ea_name == NULL) { 70 if (ea_name == NULL) {
78 cFYI(1, "Null xattr names not supported"); 71 cFYI(1, "Null xattr names not supported");
79 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) 72 } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
80 && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) { 73 && (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))) {
81 cFYI(1, 74 cFYI(1,
82 "illegal xattr request %s (only user namespace supported)", 75 "illegal xattr request %s (only user namespace supported)",
83 ea_name); 76 ea_name);
@@ -88,7 +81,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
88 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 81 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
89 goto remove_ea_exit; 82 goto remove_ea_exit;
90 83
91 ea_name += 5; /* skip past user. prefix */ 84 ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
92 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, 85 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL,
93 (__u16)0, cifs_sb->local_nls, 86 (__u16)0, cifs_sb->local_nls,
94 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 87 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -149,21 +142,23 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
149 142
150 if (ea_name == NULL) { 143 if (ea_name == NULL) {
151 cFYI(1, "Null xattr names not supported"); 144 cFYI(1, "Null xattr names not supported");
152 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 145 } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
146 == 0) {
153 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 147 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
154 goto set_ea_exit; 148 goto set_ea_exit;
155 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) 149 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
156 cFYI(1, "attempt to set cifs inode metadata"); 150 cFYI(1, "attempt to set cifs inode metadata");
157 151
158 ea_name += 5; /* skip past user. prefix */ 152 ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
159 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 153 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
160 (__u16)value_size, cifs_sb->local_nls, 154 (__u16)value_size, cifs_sb->local_nls,
161 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 155 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
162 } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { 156 } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)
157 == 0) {
163 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 158 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
164 goto set_ea_exit; 159 goto set_ea_exit;
165 160
166 ea_name += 4; /* skip past os2. prefix */ 161 ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 162 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
168 (__u16)value_size, cifs_sb->local_nls, 163 (__u16)value_size, cifs_sb->local_nls,
169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 164 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -269,7 +264,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
269 /* return alt name if available as pseudo attr */ 264 /* return alt name if available as pseudo attr */
270 if (ea_name == NULL) { 265 if (ea_name == NULL) {
271 cFYI(1, "Null xattr names not supported"); 266 cFYI(1, "Null xattr names not supported");
272 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 267 } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)
268 == 0) {
273 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 269 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
274 goto get_ea_exit; 270 goto get_ea_exit;
275 271
@@ -277,15 +273,15 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
277 cFYI(1, "attempt to query cifs inode metadata"); 273 cFYI(1, "attempt to query cifs inode metadata");
278 /* revalidate/getattr then populate from inode */ 274 /* revalidate/getattr then populate from inode */
279 } /* BB add else when above is implemented */ 275 } /* BB add else when above is implemented */
280 ea_name += 5; /* skip past user. prefix */ 276 ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
281 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, 277 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
282 buf_size, cifs_sb->local_nls, 278 buf_size, cifs_sb->local_nls,
283 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 279 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
284 } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { 280 } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
285 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 281 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
286 goto get_ea_exit; 282 goto get_ea_exit;
287 283
288 ea_name += 4; /* skip past os2. prefix */ 284 ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
289 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, 285 rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
290 buf_size, cifs_sb->local_nls, 286 buf_size, cifs_sb->local_nls,
291 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 287 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -339,10 +335,10 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
339 cFYI(1, "Query CIFS ACL not supported yet"); 335 cFYI(1, "Query CIFS ACL not supported yet");
340#endif /* CONFIG_CIFS_ACL */ 336#endif /* CONFIG_CIFS_ACL */
341 } else if (strncmp(ea_name, 337 } else if (strncmp(ea_name,
342 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { 338 XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
343 cFYI(1, "Trusted xattr namespace not supported yet"); 339 cFYI(1, "Trusted xattr namespace not supported yet");
344 } else if (strncmp(ea_name, 340 } else if (strncmp(ea_name,
345 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { 341 XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
346 cFYI(1, "Security xattr namespace not supported yet"); 342 cFYI(1, "Security xattr namespace not supported yet");
347 } else 343 } else
348 cFYI(1, 344 cFYI(1,
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 44e17e9c21ae..cc0ea9fe5ecf 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -59,12 +59,11 @@ void coda_sysctl_clean(void);
59 59
60#define CODA_ALLOC(ptr, cast, size) do { \ 60#define CODA_ALLOC(ptr, cast, size) do { \
61 if (size < PAGE_SIZE) \ 61 if (size < PAGE_SIZE) \
62 ptr = kmalloc((unsigned long) size, GFP_KERNEL); \ 62 ptr = kzalloc((unsigned long) size, GFP_KERNEL); \
63 else \ 63 else \
64 ptr = (cast)vmalloc((unsigned long) size); \ 64 ptr = (cast)vzalloc((unsigned long) size); \
65 if (!ptr) \ 65 if (!ptr) \
66 printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ 66 printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \
67 else memset( ptr, 0, size ); \
68} while (0) 67} while (0)
69 68
70 69
diff --git a/fs/compat.c b/fs/compat.c
index 58b1da459893..05e3f3d2cd77 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -37,7 +37,6 @@
37#include <linux/dirent.h> 37#include <linux/dirent.h>
38#include <linux/fsnotify.h> 38#include <linux/fsnotify.h>
39#include <linux/highuid.h> 39#include <linux/highuid.h>
40#include <linux/nfsd/syscall.h>
41#include <linux/personality.h> 40#include <linux/personality.h>
42#include <linux/rwsem.h> 41#include <linux/rwsem.h>
43#include <linux/tsacct_kern.h> 42#include <linux/tsacct_kern.h>
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c83f4768eeaa..ca418aaf6352 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -23,7 +23,8 @@
23 * 23 *
24 * configfs Copyright (C) 2005 Oracle. All rights reserved. 24 * configfs Copyright (C) 2005 Oracle. All rights reserved.
25 * 25 *
26 * Please see Documentation/filesystems/configfs.txt for more information. 26 * Please see Documentation/filesystems/configfs/configfs.txt for more
27 * information.
27 */ 28 */
28 29
29#undef DEBUG 30#undef DEBUG
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 76dc4c3e5d51..50cee7f9110b 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -23,7 +23,7 @@
23 * 23 *
24 * configfs Copyright (C) 2005 Oracle. All rights reserved. 24 * configfs Copyright (C) 2005 Oracle. All rights reserved.
25 * 25 *
26 * Please see the file Documentation/filesystems/configfs.txt for 26 * Please see the file Documentation/filesystems/configfs/configfs.txt for
27 * critical information about using the config_item interface. 27 * critical information about using the config_item interface.
28 */ 28 */
29 29
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e7a7a2f07324..f3a257d7a985 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * file.c - part of debugfs, a tiny little debug file system 2 * inode.c - part of debugfs, a tiny little debug file system
3 * 3 *
4 * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> 4 * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
5 * Copyright (C) 2004 IBM Inc. 5 * Copyright (C) 2004 IBM Inc.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index fe047d966dc5..9026fc91fe3b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -700,7 +700,7 @@ static const struct file_operations eventpoll_fops = {
700 .llseek = noop_llseek, 700 .llseek = noop_llseek,
701}; 701};
702 702
703/* Fast test to see if the file is an evenpoll file */ 703/* Fast test to see if the file is an eventpoll file */
704static inline int is_file_epoll(struct file *f) 704static inline int is_file_epoll(struct file *f)
705{ 705{
706 return f->f_op == &eventpoll_fops; 706 return f->f_op == &eventpoll_fops;
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 5d979b4347b0..c922adc8ef41 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -46,28 +46,30 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
46 value, size, flags); 46 value, size, flags);
47} 47}
48 48
49int 49int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
50ext2_init_security(struct inode *inode, struct inode *dir, 50 void *fs_info)
51 const struct qstr *qstr)
52{ 51{
53 int err; 52 const struct xattr *xattr;
54 size_t len; 53 int err = 0;
55 void *value;
56 char *name;
57 54
58 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); 55 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
59 if (err) { 56 err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
60 if (err == -EOPNOTSUPP) 57 xattr->name, xattr->value,
61 return 0; 58 xattr->value_len, 0);
62 return err; 59 if (err < 0)
60 break;
63 } 61 }
64 err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
65 name, value, len, 0);
66 kfree(name);
67 kfree(value);
68 return err; 62 return err;
69} 63}
70 64
65int
66ext2_init_security(struct inode *inode, struct inode *dir,
67 const struct qstr *qstr)
68{
69 return security_inode_init_security(inode, dir, qstr,
70 &ext2_initxattrs, NULL);
71}
72
71const struct xattr_handler ext2_xattr_security_handler = { 73const struct xattr_handler ext2_xattr_security_handler = {
72 .prefix = XATTR_SECURITY_PREFIX, 74 .prefix = XATTR_SECURITY_PREFIX,
73 .list = ext2_xattr_security_list, 75 .list = ext2_xattr_security_list,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 04da6acde85d..12661e1deedd 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1134 return bh; 1134 return bh;
1135 if (buffer_uptodate(bh)) 1135 if (buffer_uptodate(bh))
1136 return bh; 1136 return bh;
1137 ll_rw_block(READ_META, 1, &bh); 1137 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
1138 wait_on_buffer(bh); 1138 wait_on_buffer(bh);
1139 if (buffer_uptodate(bh)) 1139 if (buffer_uptodate(bh))
1140 return bh; 1140 return bh;
@@ -2807,7 +2807,7 @@ make_io:
2807 trace_ext3_load_inode(inode); 2807 trace_ext3_load_inode(inode);
2808 get_bh(bh); 2808 get_bh(bh);
2809 bh->b_end_io = end_buffer_read_sync; 2809 bh->b_end_io = end_buffer_read_sync;
2810 submit_bh(READ_META, bh); 2810 submit_bh(READ | REQ_META | REQ_PRIO, bh);
2811 wait_on_buffer(bh); 2811 wait_on_buffer(bh);
2812 if (!buffer_uptodate(bh)) { 2812 if (!buffer_uptodate(bh)) {
2813 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2813 ext3_error(inode->i_sb, "ext3_get_inode_loc",
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 5571708b6a58..0629e09f6511 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -922,7 +922,8 @@ restart:
922 bh = ext3_getblk(NULL, dir, b++, 0, &err); 922 bh = ext3_getblk(NULL, dir, b++, 0, &err);
923 bh_use[ra_max] = bh; 923 bh_use[ra_max] = bh;
924 if (bh) 924 if (bh)
925 ll_rw_block(READ_META, 1, &bh); 925 ll_rw_block(READ | REQ_META | REQ_PRIO,
926 1, &bh);
926 } 927 }
927 } 928 }
928 if ((bh = bh_use[ra_ptr++]) == NULL) 929 if ((bh = bh_use[ra_ptr++]) == NULL)
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index b8d9f83aa5c5..3c218b8a51d4 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -48,28 +48,32 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
48 name, value, size, flags); 48 name, value, size, flags);
49} 49}
50 50
51int 51int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, 52 void *fs_info)
53 const struct qstr *qstr)
54{ 53{
55 int err; 54 const struct xattr *xattr;
56 size_t len; 55 handle_t *handle = fs_info;
57 void *value; 56 int err = 0;
58 char *name;
59 57
60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); 58 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
61 if (err) { 59 err = ext3_xattr_set_handle(handle, inode,
62 if (err == -EOPNOTSUPP) 60 EXT3_XATTR_INDEX_SECURITY,
63 return 0; 61 xattr->name, xattr->value,
64 return err; 62 xattr->value_len, 0);
63 if (err < 0)
64 break;
65 } 65 }
66 err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
67 name, value, len, 0);
68 kfree(name);
69 kfree(value);
70 return err; 66 return err;
71} 67}
72 68
69int
70ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
71 const struct qstr *qstr)
72{
73 return security_inode_init_security(inode, dir, qstr,
74 &ext3_initxattrs, handle);
75}
76
73const struct xattr_handler ext3_xattr_security_handler = { 77const struct xattr_handler ext3_xattr_security_handler = {
74 .prefix = XATTR_SECURITY_PREFIX, 78 .prefix = XATTR_SECURITY_PREFIX,
75 .list = ext3_xattr_security_list, 79 .list = ext3_xattr_security_list,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 18d2558b7624..986e2388f031 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -647,7 +647,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
647 return bh; 647 return bh;
648 if (buffer_uptodate(bh)) 648 if (buffer_uptodate(bh))
649 return bh; 649 return bh;
650 ll_rw_block(READ_META, 1, &bh); 650 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
651 wait_on_buffer(bh); 651 wait_on_buffer(bh);
652 if (buffer_uptodate(bh)) 652 if (buffer_uptodate(bh))
653 return bh; 653 return bh;
@@ -3298,7 +3298,7 @@ make_io:
3298 trace_ext4_load_inode(inode); 3298 trace_ext4_load_inode(inode);
3299 get_bh(bh); 3299 get_bh(bh);
3300 bh->b_end_io = end_buffer_read_sync; 3300 bh->b_end_io = end_buffer_read_sync;
3301 submit_bh(READ_META, bh); 3301 submit_bh(READ | REQ_META | REQ_PRIO, bh);
3302 wait_on_buffer(bh); 3302 wait_on_buffer(bh);
3303 if (!buffer_uptodate(bh)) { 3303 if (!buffer_uptodate(bh)) {
3304 EXT4_ERROR_INODE_BLOCK(inode, block, 3304 EXT4_ERROR_INODE_BLOCK(inode, block,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f8068c7bae9f..1c924faeb6c8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -922,7 +922,8 @@ restart:
922 bh = ext4_getblk(NULL, dir, b++, 0, &err); 922 bh = ext4_getblk(NULL, dir, b++, 0, &err);
923 bh_use[ra_max] = bh; 923 bh_use[ra_max] = bh;
924 if (bh) 924 if (bh)
925 ll_rw_block(READ_META, 1, &bh); 925 ll_rw_block(READ | REQ_META | REQ_PRIO,
926 1, &bh);
926 } 927 }
927 } 928 }
928 if ((bh = bh_use[ra_ptr++]) == NULL) 929 if ((bh = bh_use[ra_ptr++]) == NULL)
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 007c3bfbf094..34e4350dd4d9 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -48,28 +48,32 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
48 name, value, size, flags); 48 name, value, size, flags);
49} 49}
50 50
51int 51int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, 52 void *fs_info)
53 const struct qstr *qstr)
54{ 53{
55 int err; 54 const struct xattr *xattr;
56 size_t len; 55 handle_t *handle = fs_info;
57 void *value; 56 int err = 0;
58 char *name;
59 57
60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len); 58 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
61 if (err) { 59 err = ext4_xattr_set_handle(handle, inode,
62 if (err == -EOPNOTSUPP) 60 EXT4_XATTR_INDEX_SECURITY,
63 return 0; 61 xattr->name, xattr->value,
64 return err; 62 xattr->value_len, 0);
63 if (err < 0)
64 break;
65 } 65 }
66 err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY,
67 name, value, len, 0);
68 kfree(name);
69 kfree(value);
70 return err; 66 return err;
71} 67}
72 68
69int
70ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
71 const struct qstr *qstr)
72{
73 return security_inode_init_security(inode, dir, qstr,
74 &ext4_initxattrs, handle);
75}
76
73const struct xattr_handler ext4_xattr_security_handler = { 77const struct xattr_handler ext4_xattr_security_handler = {
74 .prefix = XATTR_SECURITY_PREFIX, 78 .prefix = XATTR_SECURITY_PREFIX,
75 .list = ext4_xattr_security_list, 79 .list = ext4_xattr_security_list,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 900cf986aadc..6525b804d5ec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -624,31 +624,29 @@ fail:
624 return error; 624 return error;
625} 625}
626 626
627static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, 627int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
628 const struct qstr *qstr) 628 void *fs_info)
629{ 629{
630 int err; 630 const struct xattr *xattr;
631 size_t len; 631 int err = 0;
632 void *value; 632
633 char *name; 633 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
634 634 err = __gfs2_xattr_set(inode, xattr->name, xattr->value,
635 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, 635 xattr->value_len, 0,
636 &name, &value, &len); 636 GFS2_EATYPE_SECURITY);
637 637 if (err < 0)
638 if (err) { 638 break;
639 if (err == -EOPNOTSUPP)
640 return 0;
641 return err;
642 } 639 }
643
644 err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
645 GFS2_EATYPE_SECURITY);
646 kfree(value);
647 kfree(name);
648
649 return err; 640 return err;
650} 641}
651 642
643static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
644 const struct qstr *qstr)
645{
646 return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
647 &gfs2_initxattrs, NULL);
648}
649
652/** 650/**
653 * gfs2_create_inode - Create a new inode 651 * gfs2_create_inode - Create a new inode
654 * @dir: The parent directory 652 * @dir: The parent directory
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 85c62923ee29..598646434362 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
624 bh->b_end_io = end_buffer_write_sync; 624 bh->b_end_io = end_buffer_write_sync;
625 get_bh(bh); 625 get_bh(bh);
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META, bh); 627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else 628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); 629 submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh);
630 wait_on_buffer(bh); 630 wait_on_buffer(bh);
631 631
632 if (!buffer_uptodate(bh)) 632 if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 747238cd9f96..be29858900f6 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
37{ 37{
38 struct buffer_head *bh, *head; 38 struct buffer_head *bh, *head;
39 int nr_underway = 0; 39 int nr_underway = 0;
40 int write_op = REQ_META | 40 int write_op = REQ_META | REQ_PRIO |
41 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); 41 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
42 42
43 BUG_ON(!PageLocked(page)); 43 BUG_ON(!PageLocked(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
225 } 225 }
226 bh->b_end_io = end_buffer_read_sync; 226 bh->b_end_io = end_buffer_read_sync;
227 get_bh(bh); 227 get_bh(bh);
228 submit_bh(READ_SYNC | REQ_META, bh); 228 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
229 if (!(flags & DIO_WAIT)) 229 if (!(flags & DIO_WAIT))
230 return 0; 230 return 0;
231 231
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
435 if (buffer_uptodate(first_bh)) 435 if (buffer_uptodate(first_bh))
436 goto out; 436 goto out;
437 if (!buffer_locked(first_bh)) 437 if (!buffer_locked(first_bh))
438 ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); 438 ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh);
439 439
440 dblock++; 440 dblock++;
441 extlen--; 441 extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3bc073a4cf82..079587e53849 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
224 224
225 bio->bi_end_io = end_bio_io_page; 225 bio->bi_end_io = end_bio_io_page;
226 bio->bi_private = page; 226 bio->bi_private = page;
227 submit_bio(READ_SYNC | REQ_META, bio); 227 submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio);
228 wait_on_page_locked(page); 228 wait_on_page_locked(page);
229 bio_put(bio); 229 bio_put(bio);
230 if (!PageUptodate(page)) { 230 if (!PageUptodate(page)) {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 42e8d23bc047..0e8bb13381e4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -709,7 +709,7 @@ get_a_page:
709 set_buffer_uptodate(bh); 709 set_buffer_uptodate(bh);
710 710
711 if (!buffer_uptodate(bh)) { 711 if (!buffer_uptodate(bh)) {
712 ll_rw_block(READ_META, 1, &bh); 712 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
713 wait_on_buffer(bh); 713 wait_on_buffer(bh);
714 if (!buffer_uptodate(bh)) 714 if (!buffer_uptodate(bh))
715 goto unlock_out; 715 goto unlock_out;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index c106ca22e812..d24a9b666a23 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -344,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
344 struct inode *root, *inode; 344 struct inode *root, *inode;
345 struct qstr str; 345 struct qstr str;
346 struct nls_table *nls = NULL; 346 struct nls_table *nls = NULL;
347 u64 last_fs_block, last_fs_page;
347 int err; 348 int err;
348 349
349 err = -EINVAL; 350 err = -EINVAL;
@@ -399,9 +400,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
399 if (!sbi->rsrc_clump_blocks) 400 if (!sbi->rsrc_clump_blocks)
400 sbi->rsrc_clump_blocks = 1; 401 sbi->rsrc_clump_blocks = 1;
401 402
402 err = generic_check_addressable(sbi->alloc_blksz_shift, 403 err = -EFBIG;
403 sbi->total_blocks); 404 last_fs_block = sbi->total_blocks - 1;
404 if (err) { 405 last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >>
406 PAGE_CACHE_SHIFT;
407
408 if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) ||
409 (last_fs_page > (pgoff_t)(~0ULL))) {
405 printk(KERN_ERR "hfs: filesystem size too large.\n"); 410 printk(KERN_ERR "hfs: filesystem size too large.\n");
406 goto out_free_vhdr; 411 goto out_free_vhdr;
407 } 412 }
@@ -525,8 +530,8 @@ out_close_cat_tree:
525out_close_ext_tree: 530out_close_ext_tree:
526 hfs_btree_close(sbi->ext_tree); 531 hfs_btree_close(sbi->ext_tree);
527out_free_vhdr: 532out_free_vhdr:
528 kfree(sbi->s_vhdr); 533 kfree(sbi->s_vhdr_buf);
529 kfree(sbi->s_backup_vhdr); 534 kfree(sbi->s_backup_vhdr_buf);
530out_unload_nls: 535out_unload_nls:
531 unload_nls(sbi->nls); 536 unload_nls(sbi->nls);
532 unload_nls(nls); 537 unload_nls(nls);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 10e515a0d452..7daf4b852d1c 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -272,9 +272,9 @@ reread:
272 return 0; 272 return 0;
273 273
274out_free_backup_vhdr: 274out_free_backup_vhdr:
275 kfree(sbi->s_backup_vhdr); 275 kfree(sbi->s_backup_vhdr_buf);
276out_free_vhdr: 276out_free_vhdr:
277 kfree(sbi->s_vhdr); 277 kfree(sbi->s_vhdr_buf);
278out: 278out:
279 return error; 279 return error;
280} 280}
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index cfeb7164b085..0f20208df602 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -22,26 +22,29 @@
22#include <linux/security.h> 22#include <linux/security.h>
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* ---- Initial Security Label Attachment -------------- */ 25/* ---- Initial Security Label(s) Attachment callback --- */
26int jffs2_init_security(struct inode *inode, struct inode *dir, 26int jffs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
27 const struct qstr *qstr) 27 void *fs_info)
28{ 28{
29 int rc; 29 const struct xattr *xattr;
30 size_t len; 30 int err = 0;
31 void *value;
32 char *name;
33 31
34 rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len); 32 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
35 if (rc) { 33 err = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY,
36 if (rc == -EOPNOTSUPP) 34 xattr->name, xattr->value,
37 return 0; 35 xattr->value_len, 0);
38 return rc; 36 if (err < 0)
37 break;
39 } 38 }
40 rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0); 39 return err;
40}
41 41
42 kfree(name); 42/* ---- Initial Security Label(s) Attachment ----------- */
43 kfree(value); 43int jffs2_init_security(struct inode *inode, struct inode *dir,
44 return rc; 44 const struct qstr *qstr)
45{
46 return security_inode_init_security(inode, dir, qstr,
47 &jffs2_initxattrs, NULL);
45} 48}
46 49
47/* ---- XATTR Handler for "security.*" ----------------- */ 50/* ---- XATTR Handler for "security.*" ----------------- */
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index e87fedef23db..26683e15b3ac 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -1089,38 +1089,37 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
1089} 1089}
1090 1090
1091#ifdef CONFIG_JFS_SECURITY 1091#ifdef CONFIG_JFS_SECURITY
1092int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, 1092int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
1093 const struct qstr *qstr) 1093 void *fs_info)
1094{ 1094{
1095 int rc; 1095 const struct xattr *xattr;
1096 size_t len; 1096 tid_t *tid = fs_info;
1097 void *value;
1098 char *suffix;
1099 char *name; 1097 char *name;
1100 1098 int err = 0;
1101 rc = security_inode_init_security(inode, dir, qstr, &suffix, &value, 1099
1102 &len); 1100 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
1103 if (rc) { 1101 name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
1104 if (rc == -EOPNOTSUPP) 1102 strlen(xattr->name) + 1, GFP_NOFS);
1105 return 0; 1103 if (!name) {
1106 return rc; 1104 err = -ENOMEM;
1107 } 1105 break;
1108 name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix), 1106 }
1109 GFP_NOFS); 1107 strcpy(name, XATTR_SECURITY_PREFIX);
1110 if (!name) { 1108 strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
1111 rc = -ENOMEM; 1109
1112 goto kmalloc_failed; 1110 err = __jfs_setxattr(*tid, inode, name,
1111 xattr->value, xattr->value_len, 0);
1112 kfree(name);
1113 if (err < 0)
1114 break;
1113 } 1115 }
1114 strcpy(name, XATTR_SECURITY_PREFIX); 1116 return err;
1115 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 1117}
1116
1117 rc = __jfs_setxattr(tid, inode, name, value, len, 0);
1118
1119 kfree(name);
1120kmalloc_failed:
1121 kfree(suffix);
1122 kfree(value);
1123 1118
1124 return rc; 1119int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
1120 const struct qstr *qstr)
1121{
1122 return security_inode_init_security(inode, dir, qstr,
1123 &jfs_initxattrs, &tid);
1125} 1124}
1126#endif 1125#endif
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index b7c99bfb3da6..6f29836ec0cb 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -316,14 +316,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
316 struct hlist_node *pos; 316 struct hlist_node *pos;
317 struct nlm_host *host = NULL; 317 struct nlm_host *host = NULL;
318 struct nsm_handle *nsm = NULL; 318 struct nsm_handle *nsm = NULL;
319 struct sockaddr_in sin = { 319 struct sockaddr *src_sap = svc_daddr(rqstp);
320 .sin_family = AF_INET, 320 size_t src_len = rqstp->rq_daddrlen;
321 };
322 struct sockaddr_in6 sin6 = {
323 .sin6_family = AF_INET6,
324 };
325 struct sockaddr *src_sap;
326 size_t src_len = rqstp->rq_addrlen;
327 struct nlm_lookup_host_info ni = { 321 struct nlm_lookup_host_info ni = {
328 .server = 1, 322 .server = 1,
329 .sap = svc_addr(rqstp), 323 .sap = svc_addr(rqstp),
@@ -340,21 +334,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
340 334
341 mutex_lock(&nlm_host_mutex); 335 mutex_lock(&nlm_host_mutex);
342 336
343 switch (ni.sap->sa_family) {
344 case AF_INET:
345 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
346 src_sap = (struct sockaddr *)&sin;
347 break;
348 case AF_INET6:
349 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
350 src_sap = (struct sockaddr *)&sin6;
351 break;
352 default:
353 dprintk("lockd: %s failed; unrecognized address family\n",
354 __func__);
355 goto out;
356 }
357
358 if (time_after_eq(jiffies, next_gc)) 337 if (time_after_eq(jiffies, next_gc))
359 nlm_gc_hosts(); 338 nlm_gc_hosts();
360 339
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abfff9d7979d..c061b9aa7ddb 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -282,7 +282,7 @@ int lockd_up(void)
282 /* 282 /*
283 * Create the kernel thread and wait for it to start. 283 * Create the kernel thread and wait for it to start.
284 */ 284 */
285 nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); 285 nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
286 if (IS_ERR(nlmsvc_rqst)) { 286 if (IS_ERR(nlmsvc_rqst)) {
287 error = PTR_ERR(nlmsvc_rqst); 287 error = PTR_ERR(nlmsvc_rqst);
288 nlmsvc_rqst = NULL; 288 nlmsvc_rqst = NULL;
diff --git a/fs/locks.c b/fs/locks.c
index 703f545097de..3b0d05dcd7c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -60,7 +60,7 @@
60 * 60 *
61 * Initial implementation of mandatory locks. SunOS turned out to be 61 * Initial implementation of mandatory locks. SunOS turned out to be
62 * a rotten model, so I implemented the "obvious" semantics. 62 * a rotten model, so I implemented the "obvious" semantics.
63 * See 'Documentation/mandatory.txt' for details. 63 * See 'Documentation/filesystems/mandatory-locking.txt' for details.
64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. 64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
65 * 65 *
66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to 66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to
@@ -133,6 +133,20 @@
133#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 133#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
134#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) 134#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
135 135
136static bool lease_breaking(struct file_lock *fl)
137{
138 return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
139}
140
141static int target_leasetype(struct file_lock *fl)
142{
143 if (fl->fl_flags & FL_UNLOCK_PENDING)
144 return F_UNLCK;
145 if (fl->fl_flags & FL_DOWNGRADE_PENDING)
146 return F_RDLCK;
147 return fl->fl_type;
148}
149
136int leases_enable = 1; 150int leases_enable = 1;
137int lease_break_time = 45; 151int lease_break_time = 45;
138 152
@@ -1119,6 +1133,17 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1119 1133
1120EXPORT_SYMBOL(locks_mandatory_area); 1134EXPORT_SYMBOL(locks_mandatory_area);
1121 1135
1136static void lease_clear_pending(struct file_lock *fl, int arg)
1137{
1138 switch (arg) {
1139 case F_UNLCK:
1140 fl->fl_flags &= ~FL_UNLOCK_PENDING;
1141 /* fall through: */
1142 case F_RDLCK:
1143 fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1144 }
1145}
1146
1122/* We already had a lease on this file; just change its type */ 1147/* We already had a lease on this file; just change its type */
1123int lease_modify(struct file_lock **before, int arg) 1148int lease_modify(struct file_lock **before, int arg)
1124{ 1149{
@@ -1127,6 +1152,7 @@ int lease_modify(struct file_lock **before, int arg)
1127 1152
1128 if (error) 1153 if (error)
1129 return error; 1154 return error;
1155 lease_clear_pending(fl, arg);
1130 locks_wake_up_blocks(fl); 1156 locks_wake_up_blocks(fl);
1131 if (arg == F_UNLCK) 1157 if (arg == F_UNLCK)
1132 locks_delete_lock(before); 1158 locks_delete_lock(before);
@@ -1135,19 +1161,25 @@ int lease_modify(struct file_lock **before, int arg)
1135 1161
1136EXPORT_SYMBOL(lease_modify); 1162EXPORT_SYMBOL(lease_modify);
1137 1163
1164static bool past_time(unsigned long then)
1165{
1166 if (!then)
1167 /* 0 is a special value meaning "this never expires": */
1168 return false;
1169 return time_after(jiffies, then);
1170}
1171
1138static void time_out_leases(struct inode *inode) 1172static void time_out_leases(struct inode *inode)
1139{ 1173{
1140 struct file_lock **before; 1174 struct file_lock **before;
1141 struct file_lock *fl; 1175 struct file_lock *fl;
1142 1176
1143 before = &inode->i_flock; 1177 before = &inode->i_flock;
1144 while ((fl = *before) && IS_LEASE(fl) && (fl->fl_type & F_INPROGRESS)) { 1178 while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
1145 if ((fl->fl_break_time == 0) 1179 if (past_time(fl->fl_downgrade_time))
1146 || time_before(jiffies, fl->fl_break_time)) { 1180 lease_modify(before, F_RDLCK);
1147 before = &fl->fl_next; 1181 if (past_time(fl->fl_break_time))
1148 continue; 1182 lease_modify(before, F_UNLCK);
1149 }
1150 lease_modify(before, fl->fl_type & ~F_INPROGRESS);
1151 if (fl == *before) /* lease_modify may have freed fl */ 1183 if (fl == *before) /* lease_modify may have freed fl */
1152 before = &fl->fl_next; 1184 before = &fl->fl_next;
1153 } 1185 }
@@ -1165,7 +1197,7 @@ static void time_out_leases(struct inode *inode)
1165 */ 1197 */
1166int __break_lease(struct inode *inode, unsigned int mode) 1198int __break_lease(struct inode *inode, unsigned int mode)
1167{ 1199{
1168 int error = 0, future; 1200 int error = 0;
1169 struct file_lock *new_fl, *flock; 1201 struct file_lock *new_fl, *flock;
1170 struct file_lock *fl; 1202 struct file_lock *fl;
1171 unsigned long break_time; 1203 unsigned long break_time;
@@ -1182,24 +1214,13 @@ int __break_lease(struct inode *inode, unsigned int mode)
1182 if ((flock == NULL) || !IS_LEASE(flock)) 1214 if ((flock == NULL) || !IS_LEASE(flock))
1183 goto out; 1215 goto out;
1184 1216
1217 if (!locks_conflict(flock, new_fl))
1218 goto out;
1219
1185 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) 1220 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
1186 if (fl->fl_owner == current->files) 1221 if (fl->fl_owner == current->files)
1187 i_have_this_lease = 1; 1222 i_have_this_lease = 1;
1188 1223
1189 if (want_write) {
1190 /* If we want write access, we have to revoke any lease. */
1191 future = F_UNLCK | F_INPROGRESS;
1192 } else if (flock->fl_type & F_INPROGRESS) {
1193 /* If the lease is already being broken, we just leave it */
1194 future = flock->fl_type;
1195 } else if (flock->fl_type & F_WRLCK) {
1196 /* Downgrade the exclusive lease to a read-only lease. */
1197 future = F_RDLCK | F_INPROGRESS;
1198 } else {
1199 /* the existing lease was read-only, so we can read too. */
1200 goto out;
1201 }
1202
1203 if (IS_ERR(new_fl) && !i_have_this_lease 1224 if (IS_ERR(new_fl) && !i_have_this_lease
1204 && ((mode & O_NONBLOCK) == 0)) { 1225 && ((mode & O_NONBLOCK) == 0)) {
1205 error = PTR_ERR(new_fl); 1226 error = PTR_ERR(new_fl);
@@ -1214,12 +1235,18 @@ int __break_lease(struct inode *inode, unsigned int mode)
1214 } 1235 }
1215 1236
1216 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { 1237 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1217 if (fl->fl_type != future) { 1238 if (want_write) {
1218 fl->fl_type = future; 1239 if (fl->fl_flags & FL_UNLOCK_PENDING)
1240 continue;
1241 fl->fl_flags |= FL_UNLOCK_PENDING;
1219 fl->fl_break_time = break_time; 1242 fl->fl_break_time = break_time;
1220 /* lease must have lmops break callback */ 1243 } else {
1221 fl->fl_lmops->lm_break(fl); 1244 if (lease_breaking(flock))
1245 continue;
1246 fl->fl_flags |= FL_DOWNGRADE_PENDING;
1247 fl->fl_downgrade_time = break_time;
1222 } 1248 }
1249 fl->fl_lmops->lm_break(fl);
1223 } 1250 }
1224 1251
1225 if (i_have_this_lease || (mode & O_NONBLOCK)) { 1252 if (i_have_this_lease || (mode & O_NONBLOCK)) {
@@ -1243,10 +1270,13 @@ restart:
1243 if (error >= 0) { 1270 if (error >= 0) {
1244 if (error == 0) 1271 if (error == 0)
1245 time_out_leases(inode); 1272 time_out_leases(inode);
1246 /* Wait for the next lease that has not been broken yet */ 1273 /*
1274 * Wait for the next conflicting lease that has not been
1275 * broken yet
1276 */
1247 for (flock = inode->i_flock; flock && IS_LEASE(flock); 1277 for (flock = inode->i_flock; flock && IS_LEASE(flock);
1248 flock = flock->fl_next) { 1278 flock = flock->fl_next) {
1249 if (flock->fl_type & F_INPROGRESS) 1279 if (locks_conflict(new_fl, flock))
1250 goto restart; 1280 goto restart;
1251 } 1281 }
1252 error = 0; 1282 error = 0;
@@ -1314,7 +1344,7 @@ int fcntl_getlease(struct file *filp)
1314 for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); 1344 for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
1315 fl = fl->fl_next) { 1345 fl = fl->fl_next) {
1316 if (fl->fl_file == filp) { 1346 if (fl->fl_file == filp) {
1317 type = fl->fl_type & ~F_INPROGRESS; 1347 type = target_leasetype(fl);
1318 break; 1348 break;
1319 } 1349 }
1320 } 1350 }
@@ -1322,50 +1352,23 @@ int fcntl_getlease(struct file *filp)
1322 return type; 1352 return type;
1323} 1353}
1324 1354
1325/** 1355int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
1326 * generic_setlease - sets a lease on an open file
1327 * @filp: file pointer
1328 * @arg: type of lease to obtain
1329 * @flp: input - file_lock to use, output - file_lock inserted
1330 *
1331 * The (input) flp->fl_lmops->lm_break function is required
1332 * by break_lease().
1333 *
1334 * Called with file_lock_lock held.
1335 */
1336int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1337{ 1356{
1338 struct file_lock *fl, **before, **my_before = NULL, *lease; 1357 struct file_lock *fl, **before, **my_before = NULL, *lease;
1339 struct dentry *dentry = filp->f_path.dentry; 1358 struct dentry *dentry = filp->f_path.dentry;
1340 struct inode *inode = dentry->d_inode; 1359 struct inode *inode = dentry->d_inode;
1341 int error, rdlease_count = 0, wrlease_count = 0; 1360 int error;
1342 1361
1343 lease = *flp; 1362 lease = *flp;
1344 1363
1345 error = -EACCES; 1364 error = -EAGAIN;
1346 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) 1365 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1347 goto out;
1348 error = -EINVAL;
1349 if (!S_ISREG(inode->i_mode))
1350 goto out; 1366 goto out;
1351 error = security_file_lock(filp, arg); 1367 if ((arg == F_WRLCK)
1352 if (error) 1368 && ((dentry->d_count > 1)
1369 || (atomic_read(&inode->i_count) > 1)))
1353 goto out; 1370 goto out;
1354 1371
1355 time_out_leases(inode);
1356
1357 BUG_ON(!(*flp)->fl_lmops->lm_break);
1358
1359 if (arg != F_UNLCK) {
1360 error = -EAGAIN;
1361 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1362 goto out;
1363 if ((arg == F_WRLCK)
1364 && ((dentry->d_count > 1)
1365 || (atomic_read(&inode->i_count) > 1)))
1366 goto out;
1367 }
1368
1369 /* 1372 /*
1370 * At this point, we know that if there is an exclusive 1373 * At this point, we know that if there is an exclusive
1371 * lease on this file, then we hold it on this filp 1374 * lease on this file, then we hold it on this filp
@@ -1374,27 +1377,28 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1374 * then the file is not open by anyone (including us) 1377 * then the file is not open by anyone (including us)
1375 * except for this filp. 1378 * except for this filp.
1376 */ 1379 */
1380 error = -EAGAIN;
1377 for (before = &inode->i_flock; 1381 for (before = &inode->i_flock;
1378 ((fl = *before) != NULL) && IS_LEASE(fl); 1382 ((fl = *before) != NULL) && IS_LEASE(fl);
1379 before = &fl->fl_next) { 1383 before = &fl->fl_next) {
1380 if (fl->fl_file == filp) 1384 if (fl->fl_file == filp) {
1381 my_before = before; 1385 my_before = before;
1382 else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) 1386 continue;
1383 /* 1387 }
1384 * Someone is in the process of opening this 1388 /*
1385 * file for writing so we may not take an 1389 * No exclusive leases if someone else has a lease on
1386 * exclusive lease on it. 1390 * this file:
1387 */ 1391 */
1388 wrlease_count++; 1392 if (arg == F_WRLCK)
1389 else 1393 goto out;
1390 rdlease_count++; 1394 /*
1395 * Modifying our existing lease is OK, but no getting a
1396 * new lease if someone else is opening for write:
1397 */
1398 if (fl->fl_flags & FL_UNLOCK_PENDING)
1399 goto out;
1391 } 1400 }
1392 1401
1393 error = -EAGAIN;
1394 if ((arg == F_RDLCK && (wrlease_count > 0)) ||
1395 (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0)))
1396 goto out;
1397
1398 if (my_before != NULL) { 1402 if (my_before != NULL) {
1399 error = lease->fl_lmops->lm_change(my_before, arg); 1403 error = lease->fl_lmops->lm_change(my_before, arg);
1400 if (!error) 1404 if (!error)
@@ -1402,9 +1406,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1402 goto out; 1406 goto out;
1403 } 1407 }
1404 1408
1405 if (arg == F_UNLCK)
1406 goto out;
1407
1408 error = -EINVAL; 1409 error = -EINVAL;
1409 if (!leases_enable) 1410 if (!leases_enable)
1410 goto out; 1411 goto out;
@@ -1415,6 +1416,62 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1415out: 1416out:
1416 return error; 1417 return error;
1417} 1418}
1419
1420int generic_delete_lease(struct file *filp, struct file_lock **flp)
1421{
1422 struct file_lock *fl, **before;
1423 struct dentry *dentry = filp->f_path.dentry;
1424 struct inode *inode = dentry->d_inode;
1425
1426 for (before = &inode->i_flock;
1427 ((fl = *before) != NULL) && IS_LEASE(fl);
1428 before = &fl->fl_next) {
1429 if (fl->fl_file != filp)
1430 continue;
1431 return (*flp)->fl_lmops->lm_change(before, F_UNLCK);
1432 }
1433 return -EAGAIN;
1434}
1435
1436/**
1437 * generic_setlease - sets a lease on an open file
1438 * @filp: file pointer
1439 * @arg: type of lease to obtain
1440 * @flp: input - file_lock to use, output - file_lock inserted
1441 *
1442 * The (input) flp->fl_lmops->lm_break function is required
1443 * by break_lease().
1444 *
1445 * Called with file_lock_lock held.
1446 */
1447int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1448{
1449 struct dentry *dentry = filp->f_path.dentry;
1450 struct inode *inode = dentry->d_inode;
1451 int error;
1452
1453 if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
1454 return -EACCES;
1455 if (!S_ISREG(inode->i_mode))
1456 return -EINVAL;
1457 error = security_file_lock(filp, arg);
1458 if (error)
1459 return error;
1460
1461 time_out_leases(inode);
1462
1463 BUG_ON(!(*flp)->fl_lmops->lm_break);
1464
1465 switch (arg) {
1466 case F_UNLCK:
1467 return generic_delete_lease(filp, flp);
1468 case F_RDLCK:
1469 case F_WRLCK:
1470 return generic_add_lease(filp, arg, flp);
1471 default:
1472 BUG();
1473 }
1474}
1418EXPORT_SYMBOL(generic_setlease); 1475EXPORT_SYMBOL(generic_setlease);
1419 1476
1420static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1477static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
@@ -2126,7 +2183,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2126 } 2183 }
2127 } else if (IS_LEASE(fl)) { 2184 } else if (IS_LEASE(fl)) {
2128 seq_printf(f, "LEASE "); 2185 seq_printf(f, "LEASE ");
2129 if (fl->fl_type & F_INPROGRESS) 2186 if (lease_breaking(fl))
2130 seq_printf(f, "BREAKING "); 2187 seq_printf(f, "BREAKING ");
2131 else if (fl->fl_file) 2188 else if (fl->fl_file)
2132 seq_printf(f, "ACTIVE "); 2189 seq_printf(f, "ACTIVE ");
@@ -2142,7 +2199,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2142 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); 2199 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
2143 } else { 2200 } else {
2144 seq_printf(f, "%s ", 2201 seq_printf(f, "%s ",
2145 (fl->fl_type & F_INPROGRESS) 2202 (lease_breaking(fl))
2146 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " 2203 ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ "
2147 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); 2204 : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
2148 } 2205 }
diff --git a/fs/namei.c b/fs/namei.c
index b52bc685465f..0b3138de2a3b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -721,12 +721,6 @@ static int follow_automount(struct path *path, unsigned flags,
721 if (!path->dentry->d_op || !path->dentry->d_op->d_automount) 721 if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
722 return -EREMOTE; 722 return -EREMOTE;
723 723
724 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
725 * and this is the terminal part of the path.
726 */
727 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
728 return -EISDIR; /* we actually want to stop here */
729
730 /* We don't want to mount if someone's just doing a stat - 724 /* We don't want to mount if someone's just doing a stat -
731 * unless they're stat'ing a directory and appended a '/' to 725 * unless they're stat'ing a directory and appended a '/' to
732 * the name. 726 * the name.
@@ -739,7 +733,7 @@ static int follow_automount(struct path *path, unsigned flags,
739 * of the daemon to instantiate them before they can be used. 733 * of the daemon to instantiate them before they can be used.
740 */ 734 */
741 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | 735 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
742 LOOKUP_OPEN | LOOKUP_CREATE)) && 736 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
743 path->dentry->d_inode) 737 path->dentry->d_inode)
744 return -EISDIR; 738 return -EISDIR;
745 739
@@ -2616,6 +2610,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2616 if (!dir->i_op->rmdir) 2610 if (!dir->i_op->rmdir)
2617 return -EPERM; 2611 return -EPERM;
2618 2612
2613 dget(dentry);
2619 mutex_lock(&dentry->d_inode->i_mutex); 2614 mutex_lock(&dentry->d_inode->i_mutex);
2620 2615
2621 error = -EBUSY; 2616 error = -EBUSY;
@@ -2636,6 +2631,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2636 2631
2637out: 2632out:
2638 mutex_unlock(&dentry->d_inode->i_mutex); 2633 mutex_unlock(&dentry->d_inode->i_mutex);
2634 dput(dentry);
2639 if (!error) 2635 if (!error)
2640 d_delete(dentry); 2636 d_delete(dentry);
2641 return error; 2637 return error;
@@ -3025,6 +3021,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3025 if (error) 3021 if (error)
3026 return error; 3022 return error;
3027 3023
3024 dget(new_dentry);
3028 if (target) 3025 if (target)
3029 mutex_lock(&target->i_mutex); 3026 mutex_lock(&target->i_mutex);
3030 3027
@@ -3045,6 +3042,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3045out: 3042out:
3046 if (target) 3043 if (target)
3047 mutex_unlock(&target->i_mutex); 3044 mutex_unlock(&target->i_mutex);
3045 dput(new_dentry);
3048 if (!error) 3046 if (!error)
3049 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3047 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3050 d_move(old_dentry,new_dentry); 3048 d_move(old_dentry,new_dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index 22bfe8273c68..b4febb29d3bb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1757,7 +1757,7 @@ static int do_loopback(struct path *path, char *old_name,
1757 return err; 1757 return err;
1758 if (!old_name || !*old_name) 1758 if (!old_name || !*old_name)
1759 return -EINVAL; 1759 return -EINVAL;
1760 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); 1760 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
1761 if (err) 1761 if (err)
1762 return err; 1762 return err;
1763 1763
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9561c8fc8bdb..281ae95932c9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -176,17 +176,6 @@ retry:
176 return bio; 176 return bio;
177} 177}
178 178
179static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
180{
181 if (lseg->pls_range.iomode == IOMODE_RW) {
182 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
183 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
184 } else {
185 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
186 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
187 }
188}
189
190/* This is basically copied from mpage_end_io_read */ 179/* This is basically copied from mpage_end_io_read */
191static void bl_end_io_read(struct bio *bio, int err) 180static void bl_end_io_read(struct bio *bio, int err)
192{ 181{
@@ -206,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err)
206 if (!uptodate) { 195 if (!uptodate) {
207 if (!rdata->pnfs_error) 196 if (!rdata->pnfs_error)
208 rdata->pnfs_error = -EIO; 197 rdata->pnfs_error = -EIO;
209 bl_set_lo_fail(rdata->lseg); 198 pnfs_set_lo_fail(rdata->lseg);
210 } 199 }
211 bio_put(bio); 200 bio_put(bio);
212 put_parallel(par); 201 put_parallel(par);
@@ -303,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
303 bl_end_io_read, par); 292 bl_end_io_read, par);
304 if (IS_ERR(bio)) { 293 if (IS_ERR(bio)) {
305 rdata->pnfs_error = PTR_ERR(bio); 294 rdata->pnfs_error = PTR_ERR(bio);
295 bio = NULL;
306 goto out; 296 goto out;
307 } 297 }
308 } 298 }
@@ -370,7 +360,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
370 if (!uptodate) { 360 if (!uptodate) {
371 if (!wdata->pnfs_error) 361 if (!wdata->pnfs_error)
372 wdata->pnfs_error = -EIO; 362 wdata->pnfs_error = -EIO;
373 bl_set_lo_fail(wdata->lseg); 363 pnfs_set_lo_fail(wdata->lseg);
374 } 364 }
375 bio_put(bio); 365 bio_put(bio);
376 put_parallel(par); 366 put_parallel(par);
@@ -386,7 +376,7 @@ static void bl_end_io_write(struct bio *bio, int err)
386 if (!uptodate) { 376 if (!uptodate) {
387 if (!wdata->pnfs_error) 377 if (!wdata->pnfs_error)
388 wdata->pnfs_error = -EIO; 378 wdata->pnfs_error = -EIO;
389 bl_set_lo_fail(wdata->lseg); 379 pnfs_set_lo_fail(wdata->lseg);
390 } 380 }
391 bio_put(bio); 381 bio_put(bio);
392 put_parallel(par); 382 put_parallel(par);
@@ -543,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
543fill_invalid_ext: 533fill_invalid_ext:
544 dprintk("%s need to zero %d pages\n", __func__, npg_zero); 534 dprintk("%s need to zero %d pages\n", __func__, npg_zero);
545 for (;npg_zero > 0; npg_zero--) { 535 for (;npg_zero > 0; npg_zero--) {
536 if (bl_is_sector_init(be->be_inval, isect)) {
537 dprintk("isect %llu already init\n",
538 (unsigned long long)isect);
539 goto next_page;
540 }
546 /* page ref released in bl_end_io_write_zero */ 541 /* page ref released in bl_end_io_write_zero */
547 index = isect >> PAGE_CACHE_SECTOR_SHIFT; 542 index = isect >> PAGE_CACHE_SECTOR_SHIFT;
548 dprintk("%s zero %dth page: index %lu isect %llu\n", 543 dprintk("%s zero %dth page: index %lu isect %llu\n",
@@ -562,8 +557,7 @@ fill_invalid_ext:
562 * PageUptodate: It was read before 557 * PageUptodate: It was read before
563 * sector_initialized: already written out 558 * sector_initialized: already written out
564 */ 559 */
565 if (PageDirty(page) || PageWriteback(page) || 560 if (PageDirty(page) || PageWriteback(page)) {
566 bl_is_sector_init(be->be_inval, isect)) {
567 print_page(page); 561 print_page(page);
568 unlock_page(page); 562 unlock_page(page);
569 page_cache_release(page); 563 page_cache_release(page);
@@ -592,6 +586,7 @@ fill_invalid_ext:
592 bl_end_io_write_zero, par); 586 bl_end_io_write_zero, par);
593 if (IS_ERR(bio)) { 587 if (IS_ERR(bio)) {
594 wdata->pnfs_error = PTR_ERR(bio); 588 wdata->pnfs_error = PTR_ERR(bio);
589 bio = NULL;
595 goto out; 590 goto out;
596 } 591 }
597 /* FIXME: This should be done in bi_end_io */ 592 /* FIXME: This should be done in bi_end_io */
@@ -640,6 +635,7 @@ next_page:
640 bl_end_io_write, par); 635 bl_end_io_write, par);
641 if (IS_ERR(bio)) { 636 if (IS_ERR(bio)) {
642 wdata->pnfs_error = PTR_ERR(bio); 637 wdata->pnfs_error = PTR_ERR(bio);
638 bio = NULL;
643 goto out; 639 goto out;
644 } 640 }
645 isect += PAGE_CACHE_SECTORS; 641 isect += PAGE_CACHE_SECTORS;
@@ -805,7 +801,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
805 struct nfs4_deviceid *d_id) 801 struct nfs4_deviceid *d_id)
806{ 802{
807 struct pnfs_device *dev; 803 struct pnfs_device *dev;
808 struct pnfs_block_dev *rv = NULL; 804 struct pnfs_block_dev *rv;
809 u32 max_resp_sz; 805 u32 max_resp_sz;
810 int max_pages; 806 int max_pages;
811 struct page **pages = NULL; 807 struct page **pages = NULL;
@@ -823,18 +819,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
823 dev = kmalloc(sizeof(*dev), GFP_NOFS); 819 dev = kmalloc(sizeof(*dev), GFP_NOFS);
824 if (!dev) { 820 if (!dev) {
825 dprintk("%s kmalloc failed\n", __func__); 821 dprintk("%s kmalloc failed\n", __func__);
826 return NULL; 822 return ERR_PTR(-ENOMEM);
827 } 823 }
828 824
829 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 825 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
830 if (pages == NULL) { 826 if (pages == NULL) {
831 kfree(dev); 827 kfree(dev);
832 return NULL; 828 return ERR_PTR(-ENOMEM);
833 } 829 }
834 for (i = 0; i < max_pages; i++) { 830 for (i = 0; i < max_pages; i++) {
835 pages[i] = alloc_page(GFP_NOFS); 831 pages[i] = alloc_page(GFP_NOFS);
836 if (!pages[i]) 832 if (!pages[i]) {
833 rv = ERR_PTR(-ENOMEM);
837 goto out_free; 834 goto out_free;
835 }
838 } 836 }
839 837
840 memcpy(&dev->dev_id, d_id, sizeof(*d_id)); 838 memcpy(&dev->dev_id, d_id, sizeof(*d_id));
@@ -847,8 +845,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
847 dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); 845 dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
848 rc = nfs4_proc_getdeviceinfo(server, dev); 846 rc = nfs4_proc_getdeviceinfo(server, dev);
849 dprintk("%s getdevice info returns %d\n", __func__, rc); 847 dprintk("%s getdevice info returns %d\n", __func__, rc);
850 if (rc) 848 if (rc) {
849 rv = ERR_PTR(rc);
851 goto out_free; 850 goto out_free;
851 }
852 852
853 rv = nfs4_blk_decode_device(server, dev); 853 rv = nfs4_blk_decode_device(server, dev);
854 out_free: 854 out_free:
@@ -866,7 +866,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
866 struct pnfs_devicelist *dlist = NULL; 866 struct pnfs_devicelist *dlist = NULL;
867 struct pnfs_block_dev *bdev; 867 struct pnfs_block_dev *bdev;
868 LIST_HEAD(block_disklist); 868 LIST_HEAD(block_disklist);
869 int status = 0, i; 869 int status, i;
870 870
871 dprintk("%s enter\n", __func__); 871 dprintk("%s enter\n", __func__);
872 872
@@ -898,8 +898,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
898 for (i = 0; i < dlist->num_devs; i++) { 898 for (i = 0; i < dlist->num_devs; i++) {
899 bdev = nfs4_blk_get_deviceinfo(server, fh, 899 bdev = nfs4_blk_get_deviceinfo(server, fh,
900 &dlist->dev_id[i]); 900 &dlist->dev_id[i]);
901 if (!bdev) { 901 if (IS_ERR(bdev)) {
902 status = -ENODEV; 902 status = PTR_ERR(bdev);
903 goto out_error; 903 goto out_error;
904 } 904 }
905 spin_lock(&b_mt_id->bm_lock); 905 spin_lock(&b_mt_id->bm_lock);
@@ -960,7 +960,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
960}; 960};
961 961
962static const struct rpc_pipe_ops bl_upcall_ops = { 962static const struct rpc_pipe_ops bl_upcall_ops = {
963 .upcall = bl_pipe_upcall, 963 .upcall = rpc_pipe_generic_upcall,
964 .downcall = bl_pipe_downcall, 964 .downcall = bl_pipe_downcall,
965 .destroy_msg = bl_pipe_destroy_msg, 965 .destroy_msg = bl_pipe_destroy_msg,
966}; 966};
@@ -989,17 +989,20 @@ static int __init nfs4blocklayout_init(void)
989 mnt, 989 mnt,
990 NFS_PIPE_DIRNAME, 0, &path); 990 NFS_PIPE_DIRNAME, 0, &path);
991 if (ret) 991 if (ret)
992 goto out_remove; 992 goto out_putrpc;
993 993
994 bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, 994 bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
995 &bl_upcall_ops, 0); 995 &bl_upcall_ops, 0);
996 path_put(&path);
996 if (IS_ERR(bl_device_pipe)) { 997 if (IS_ERR(bl_device_pipe)) {
997 ret = PTR_ERR(bl_device_pipe); 998 ret = PTR_ERR(bl_device_pipe);
998 goto out_remove; 999 goto out_putrpc;
999 } 1000 }
1000out: 1001out:
1001 return ret; 1002 return ret;
1002 1003
1004out_putrpc:
1005 rpc_put_mount();
1003out_remove: 1006out_remove:
1004 pnfs_unregister_layoutdriver(&blocklayout_type); 1007 pnfs_unregister_layoutdriver(&blocklayout_type);
1005 return ret; 1008 return ret;
@@ -1012,6 +1015,7 @@ static void __exit nfs4blocklayout_exit(void)
1012 1015
1013 pnfs_unregister_layoutdriver(&blocklayout_type); 1016 pnfs_unregister_layoutdriver(&blocklayout_type);
1014 rpc_unlink(bl_device_pipe); 1017 rpc_unlink(bl_device_pipe);
1018 rpc_put_mount();
1015} 1019}
1016 1020
1017MODULE_ALIAS("nfs-layouttype4-3"); 1021MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f27d827960a3..42acf7ef5992 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
150} 150}
151 151
152struct bl_dev_msg { 152struct bl_dev_msg {
153 int status; 153 int32_t status;
154 uint32_t major, minor; 154 uint32_t major, minor;
155}; 155};
156 156
@@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq;
169#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ 169#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
170 170
171/* blocklayoutdev.c */ 171/* blocklayoutdev.c */
172ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
173 char __user *, size_t);
174ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); 172ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
175void bl_pipe_destroy_msg(struct rpc_pipe_msg *); 173void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
176struct block_device *nfs4_blkdev_get(dev_t dev); 174struct block_device *nfs4_blkdev_get(dev_t dev);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a83b393fb01c..d08ba9107fde 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev)
79 return blkdev_put(bdev, FMODE_READ); 79 return blkdev_put(bdev, FMODE_READ);
80} 80}
81 81
82/*
83 * Shouldn't there be a rpc_generic_upcall() to do this for us?
84 */
85ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
86 char __user *dst, size_t buflen)
87{
88 char *data = (char *)msg->data + msg->copied;
89 size_t mlen = min(msg->len - msg->copied, buflen);
90 unsigned long left;
91
92 left = copy_to_user(dst, data, mlen);
93 if (left == mlen) {
94 msg->errno = -EFAULT;
95 return -EFAULT;
96 }
97
98 mlen -= left;
99 msg->copied += mlen;
100 msg->errno = 0;
101 return mlen;
102}
103
104static struct bl_dev_msg bl_mount_reply; 82static struct bl_dev_msg bl_mount_reply;
105 83
106ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, 84ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
@@ -131,7 +109,7 @@ struct pnfs_block_dev *
131nfs4_blk_decode_device(struct nfs_server *server, 109nfs4_blk_decode_device(struct nfs_server *server,
132 struct pnfs_device *dev) 110 struct pnfs_device *dev)
133{ 111{
134 struct pnfs_block_dev *rv = NULL; 112 struct pnfs_block_dev *rv;
135 struct block_device *bd = NULL; 113 struct block_device *bd = NULL;
136 struct rpc_pipe_msg msg; 114 struct rpc_pipe_msg msg;
137 struct bl_msg_hdr bl_msg = { 115 struct bl_msg_hdr bl_msg = {
@@ -141,7 +119,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
141 uint8_t *dataptr; 119 uint8_t *dataptr;
142 DECLARE_WAITQUEUE(wq, current); 120 DECLARE_WAITQUEUE(wq, current);
143 struct bl_dev_msg *reply = &bl_mount_reply; 121 struct bl_dev_msg *reply = &bl_mount_reply;
144 int offset, len, i; 122 int offset, len, i, rc;
145 123
146 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 124 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
147 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, 125 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
@@ -168,8 +146,10 @@ nfs4_blk_decode_device(struct nfs_server *server,
168 146
169 dprintk("%s CALLING USERSPACE DAEMON\n", __func__); 147 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
170 add_wait_queue(&bl_wq, &wq); 148 add_wait_queue(&bl_wq, &wq);
171 if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { 149 rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
150 if (rc < 0) {
172 remove_wait_queue(&bl_wq, &wq); 151 remove_wait_queue(&bl_wq, &wq);
152 rv = ERR_PTR(rc);
173 goto out; 153 goto out;
174 } 154 }
175 155
@@ -187,8 +167,9 @@ nfs4_blk_decode_device(struct nfs_server *server,
187 167
188 bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); 168 bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor));
189 if (IS_ERR(bd)) { 169 if (IS_ERR(bd)) {
190 dprintk("%s failed to open device : %ld\n", 170 rc = PTR_ERR(bd);
191 __func__, PTR_ERR(bd)); 171 dprintk("%s failed to open device : %d\n", __func__, rc);
172 rv = ERR_PTR(rc);
192 goto out; 173 goto out;
193 } 174 }
194 175
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e3d294269058..516f3375e067 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -125,7 +125,7 @@ nfs4_callback_up(struct svc_serv *serv)
125 else 125 else
126 goto out_err; 126 goto out_err;
127 127
128 return svc_prepare_thread(serv, &serv->sv_pools[0]); 128 return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
129 129
130out_err: 130out_err:
131 if (ret == 0) 131 if (ret == 0)
@@ -199,7 +199,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
199 INIT_LIST_HEAD(&serv->sv_cb_list); 199 INIT_LIST_HEAD(&serv->sv_cb_list);
200 spin_lock_init(&serv->sv_cb_lock); 200 spin_lock_init(&serv->sv_cb_lock);
201 init_waitqueue_head(&serv->sv_cb_waitq); 201 init_waitqueue_head(&serv->sv_cb_waitq);
202 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); 202 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
203 if (IS_ERR(rqstp)) { 203 if (IS_ERR(rqstp)) {
204 svc_xprt_put(serv->sv_bc_xprt); 204 svc_xprt_put(serv->sv_bc_xprt);
205 serv->sv_bc_xprt = NULL; 205 serv->sv_bc_xprt = NULL;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5833fbbf59b0..873bf00d51a2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -336,11 +336,12 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
336 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; 336 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
337 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; 337 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
338 338
339 if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && 339 if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
340 sin1->sin6_scope_id != sin2->sin6_scope_id)
341 return 0; 340 return 0;
341 else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
342 return sin1->sin6_scope_id == sin2->sin6_scope_id;
342 343
343 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); 344 return 1;
344} 345}
345#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ 346#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
346static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, 347static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
@@ -1867,6 +1868,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1867 /* display one transport per line on subsequent lines */ 1868 /* display one transport per line on subsequent lines */
1868 clp = list_entry(v, struct nfs_client, cl_share_link); 1869 clp = list_entry(v, struct nfs_client, cl_share_link);
1869 1870
1871 /* Check if the client is initialized */
1872 if (clp->cl_cons_state != NFS_CS_READY)
1873 return 0;
1874
1870 seq_printf(m, "v%u %s %s %3d %s\n", 1875 seq_printf(m, "v%u %s %s %3d %s\n",
1871 clp->rpc_ops->version, 1876 clp->rpc_ops->version,
1872 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 1877 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 321a66bc3846..7f2654069806 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -240,7 +240,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
240 sizeof(delegation->stateid.data)); 240 sizeof(delegation->stateid.data));
241 delegation->type = res->delegation_type; 241 delegation->type = res->delegation_type;
242 delegation->maxsize = res->maxsize; 242 delegation->maxsize = res->maxsize;
243 delegation->change_attr = nfsi->change_attr; 243 delegation->change_attr = inode->i_version;
244 delegation->cred = get_rpccred(cred); 244 delegation->cred = get_rpccred(cred);
245 delegation->inode = inode; 245 delegation->inode = inode;
246 delegation->flags = 1<<NFS_DELEGATION_REFERENCED; 246 delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
index 5b1006480bc2..7cf2c4699b08 100644
--- a/fs/nfs/fscache-index.c
+++ b/fs/nfs/fscache-index.c
@@ -212,7 +212,7 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
212 auxdata.ctime = nfsi->vfs_inode.i_ctime; 212 auxdata.ctime = nfsi->vfs_inode.i_ctime;
213 213
214 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) 214 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
215 auxdata.change_attr = nfsi->change_attr; 215 auxdata.change_attr = nfsi->vfs_inode.i_version;
216 216
217 if (bufmax > sizeof(auxdata)) 217 if (bufmax > sizeof(auxdata))
218 bufmax = sizeof(auxdata); 218 bufmax = sizeof(auxdata);
@@ -244,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
244 auxdata.ctime = nfsi->vfs_inode.i_ctime; 244 auxdata.ctime = nfsi->vfs_inode.i_ctime;
245 245
246 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) 246 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
247 auxdata.change_attr = nfsi->change_attr; 247 auxdata.change_attr = nfsi->vfs_inode.i_version;
248 248
249 if (memcmp(data, &auxdata, datalen) != 0) 249 if (memcmp(data, &auxdata, datalen) != 0)
250 return FSCACHE_CHECKAUX_OBSOLETE; 250 return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f20801ae0a16..47d1c6ff2d8e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -336,8 +336,6 @@ struct idmap {
336 struct idmap_hashtable idmap_group_hash; 336 struct idmap_hashtable idmap_group_hash;
337}; 337};
338 338
339static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
340 char __user *, size_t);
341static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 339static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
342 size_t); 340 size_t);
343static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); 341static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
@@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
345static unsigned int fnvhash32(const void *, size_t); 343static unsigned int fnvhash32(const void *, size_t);
346 344
347static const struct rpc_pipe_ops idmap_upcall_ops = { 345static const struct rpc_pipe_ops idmap_upcall_ops = {
348 .upcall = idmap_pipe_upcall, 346 .upcall = rpc_pipe_generic_upcall,
349 .downcall = idmap_pipe_downcall, 347 .downcall = idmap_pipe_downcall,
350 .destroy_msg = idmap_pipe_destroy_msg, 348 .destroy_msg = idmap_pipe_destroy_msg,
351}; 349};
@@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
595 return ret; 593 return ret;
596} 594}
597 595
598/* RPC pipefs upcall/downcall routines */
599static ssize_t
600idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
601 char __user *dst, size_t buflen)
602{
603 char *data = (char *)msg->data + msg->copied;
604 size_t mlen = min(msg->len, buflen);
605 unsigned long left;
606
607 left = copy_to_user(dst, data, mlen);
608 if (left == mlen) {
609 msg->errno = -EFAULT;
610 return -EFAULT;
611 }
612
613 mlen -= left;
614 msg->copied += mlen;
615 msg->errno = 0;
616 return mlen;
617}
618
619static ssize_t 596static ssize_t
620idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) 597idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
621{ 598{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index fe1203797b2b..4dc6d078f108 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -318,7 +318,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
318 memset(&inode->i_atime, 0, sizeof(inode->i_atime)); 318 memset(&inode->i_atime, 0, sizeof(inode->i_atime));
319 memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); 319 memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); 320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
321 nfsi->change_attr = 0; 321 inode->i_version = 0;
322 inode->i_size = 0; 322 inode->i_size = 0;
323 inode->i_nlink = 0; 323 inode->i_nlink = 0;
324 inode->i_uid = -2; 324 inode->i_uid = -2;
@@ -344,7 +344,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
344 | NFS_INO_INVALID_ACCESS 344 | NFS_INO_INVALID_ACCESS
345 | NFS_INO_INVALID_ACL; 345 | NFS_INO_INVALID_ACL;
346 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 346 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
347 nfsi->change_attr = fattr->change_attr; 347 inode->i_version = fattr->change_attr;
348 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 348 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
349 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 349 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
350 | NFS_INO_INVALID_DATA; 350 | NFS_INO_INVALID_DATA;
@@ -897,8 +897,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
897 897
898 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) 898 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
899 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) 899 && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
900 && nfsi->change_attr == fattr->pre_change_attr) { 900 && inode->i_version == fattr->pre_change_attr) {
901 nfsi->change_attr = fattr->change_attr; 901 inode->i_version = fattr->change_attr;
902 if (S_ISDIR(inode->i_mode)) 902 if (S_ISDIR(inode->i_mode))
903 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 903 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
904 ret |= NFS_INO_INVALID_ATTR; 904 ret |= NFS_INO_INVALID_ATTR;
@@ -952,7 +952,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
952 return -EIO; 952 return -EIO;
953 953
954 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && 954 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
955 nfsi->change_attr != fattr->change_attr) 955 inode->i_version != fattr->change_attr)
956 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 956 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
957 957
958 /* Verify a few of the more important attributes */ 958 /* Verify a few of the more important attributes */
@@ -1163,7 +1163,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
1163 } 1163 }
1164 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && 1164 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
1165 (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { 1165 (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
1166 fattr->pre_change_attr = NFS_I(inode)->change_attr; 1166 fattr->pre_change_attr = inode->i_version;
1167 fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; 1167 fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
1168 } 1168 }
1169 if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && 1169 if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
@@ -1244,13 +1244,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1244 1244
1245 /* More cache consistency checks */ 1245 /* More cache consistency checks */
1246 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { 1246 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
1247 if (nfsi->change_attr != fattr->change_attr) { 1247 if (inode->i_version != fattr->change_attr) {
1248 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1248 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1249 inode->i_sb->s_id, inode->i_ino); 1249 inode->i_sb->s_id, inode->i_ino);
1250 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1250 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1251 if (S_ISDIR(inode->i_mode)) 1251 if (S_ISDIR(inode->i_mode))
1252 nfs_force_lookup_revalidate(inode); 1252 nfs_force_lookup_revalidate(inode);
1253 nfsi->change_attr = fattr->change_attr; 1253 inode->i_version = fattr->change_attr;
1254 } 1254 }
1255 } else if (server->caps & NFS_CAP_CHANGE_ATTR) 1255 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1256 invalid |= save_cache_validity; 1256 invalid |= save_cache_validity;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ab12913dd473..c1a1bd8ddf1c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -457,13 +457,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
457 PAGE_SIZE - 1) >> PAGE_SHIFT; 457 PAGE_SIZE - 1) >> PAGE_SHIFT;
458} 458}
459 459
460/*
461 * Helper for restarting RPC calls in the possible presence of NFSv4.1
462 * sessions.
463 */
464static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp)
465{
466 if (nfs4_has_session(clp))
467 return rpc_restart_call_prepare(task);
468 return rpc_restart_call(task);
469}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 1ec1a85fa71c..693ae22f8731 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,30 +13,6 @@
13 13
14struct idmap; 14struct idmap;
15 15
16/*
17 * In a seqid-mutating op, this macro controls which error return
18 * values trigger incrementation of the seqid.
19 *
20 * from rfc 3010:
21 * The client MUST monotonically increment the sequence number for the
22 * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
23 * operations. This is true even in the event that the previous
24 * operation that used the sequence number received an error. The only
25 * exception to this rule is if the previous operation received one of
26 * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
27 * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
28 * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
29 *
30 */
31#define seqid_mutating_err(err) \
32(((err) != NFSERR_STALE_CLIENTID) && \
33 ((err) != NFSERR_STALE_STATEID) && \
34 ((err) != NFSERR_BAD_STATEID) && \
35 ((err) != NFSERR_BAD_SEQID) && \
36 ((err) != NFSERR_BAD_XDR) && \
37 ((err) != NFSERR_RESOURCE) && \
38 ((err) != NFSERR_NOFILEHANDLE))
39
40enum nfs4_client_state { 16enum nfs4_client_state {
41 NFS4CLNT_MANAGER_RUNNING = 0, 17 NFS4CLNT_MANAGER_RUNNING = 0,
42 NFS4CLNT_CHECK_LEASE, 18 NFS4CLNT_CHECK_LEASE,
@@ -56,6 +32,9 @@ enum nfs4_session_state {
56 NFS4_SESSION_DRAINING, 32 NFS4_SESSION_DRAINING,
57}; 33};
58 34
35#define NFS4_RENEW_TIMEOUT 0x01
36#define NFS4_RENEW_DELEGATION_CB 0x02
37
59struct nfs4_minor_version_ops { 38struct nfs4_minor_version_ops {
60 u32 minor_version; 39 u32 minor_version;
61 40
@@ -225,7 +204,7 @@ struct nfs4_state_recovery_ops {
225}; 204};
226 205
227struct nfs4_state_maintenance_ops { 206struct nfs4_state_maintenance_ops {
228 int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); 207 int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned);
229 struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); 208 struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
230 int (*renew_lease)(struct nfs_client *, struct rpc_cred *); 209 int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
231}; 210};
@@ -237,8 +216,6 @@ extern const struct inode_operations nfs4_dir_inode_operations;
237extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 216extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
238extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 217extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
239extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 218extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
240extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
241extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 219extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 220extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 221extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -349,6 +326,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
349extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 326extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
350extern void nfs4_schedule_lease_recovery(struct nfs_client *); 327extern void nfs4_schedule_lease_recovery(struct nfs_client *);
351extern void nfs4_schedule_state_manager(struct nfs_client *); 328extern void nfs4_schedule_state_manager(struct nfs_client *);
329extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
352extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 330extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
353extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 331extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
354extern void nfs41_handle_recall_slot(struct nfs_client *clp); 332extern void nfs41_handle_recall_slot(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e8915d4840ad..09119418402f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
77 BUG(); 77 BUG();
78} 78}
79 79
80/* For data server errors we don't recover from */
81static void
82filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
83{
84 if (lseg->pls_range.iomode == IOMODE_RW) {
85 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
86 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
87 } else {
88 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
89 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
90 }
91}
92
93static int filelayout_async_handle_error(struct rpc_task *task, 80static int filelayout_async_handle_error(struct rpc_task *task,
94 struct nfs4_state *state, 81 struct nfs4_state *state,
95 struct nfs_client *clp, 82 struct nfs_client *clp,
@@ -135,7 +122,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
135static int filelayout_read_done_cb(struct rpc_task *task, 122static int filelayout_read_done_cb(struct rpc_task *task,
136 struct nfs_read_data *data) 123 struct nfs_read_data *data)
137{ 124{
138 struct nfs_client *clp = data->ds_clp;
139 int reset = 0; 125 int reset = 0;
140 126
141 dprintk("%s DS read\n", __func__); 127 dprintk("%s DS read\n", __func__);
@@ -145,11 +131,10 @@ static int filelayout_read_done_cb(struct rpc_task *task,
145 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 131 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
146 __func__, data->ds_clp, data->ds_clp->cl_session); 132 __func__, data->ds_clp, data->ds_clp->cl_session);
147 if (reset) { 133 if (reset) {
148 filelayout_set_lo_fail(data->lseg); 134 pnfs_set_lo_fail(data->lseg);
149 nfs4_reset_read(task, data); 135 nfs4_reset_read(task, data);
150 clp = NFS_SERVER(data->inode)->nfs_client;
151 } 136 }
152 nfs_restart_rpc(task, clp); 137 rpc_restart_call_prepare(task);
153 return -EAGAIN; 138 return -EAGAIN;
154 } 139 }
155 140
@@ -216,17 +201,13 @@ static int filelayout_write_done_cb(struct rpc_task *task,
216 201
217 if (filelayout_async_handle_error(task, data->args.context->state, 202 if (filelayout_async_handle_error(task, data->args.context->state,
218 data->ds_clp, &reset) == -EAGAIN) { 203 data->ds_clp, &reset) == -EAGAIN) {
219 struct nfs_client *clp;
220
221 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 204 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
222 __func__, data->ds_clp, data->ds_clp->cl_session); 205 __func__, data->ds_clp, data->ds_clp->cl_session);
223 if (reset) { 206 if (reset) {
224 filelayout_set_lo_fail(data->lseg); 207 pnfs_set_lo_fail(data->lseg);
225 nfs4_reset_write(task, data); 208 nfs4_reset_write(task, data);
226 clp = NFS_SERVER(data->inode)->nfs_client; 209 }
227 } else 210 rpc_restart_call_prepare(task);
228 clp = data->ds_clp;
229 nfs_restart_rpc(task, clp);
230 return -EAGAIN; 211 return -EAGAIN;
231 } 212 }
232 213
@@ -256,9 +237,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
256 __func__, data->ds_clp, data->ds_clp->cl_session); 237 __func__, data->ds_clp, data->ds_clp->cl_session);
257 if (reset) { 238 if (reset) {
258 prepare_to_resend_writes(data); 239 prepare_to_resend_writes(data);
259 filelayout_set_lo_fail(data->lseg); 240 pnfs_set_lo_fail(data->lseg);
260 } else 241 } else
261 nfs_restart_rpc(task, data->ds_clp); 242 rpc_restart_call_prepare(task);
262 return -EAGAIN; 243 return -EAGAIN;
263 } 244 }
264 245
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8c77039e7a81..d2ae413c986a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -73,9 +73,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
73static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 73static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
74static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 74static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
75static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 75static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
76static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
77 const struct qstr *name, struct nfs_fh *fhandle,
78 struct nfs_fattr *fattr);
79static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 76static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 77static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
81 struct nfs_fattr *fattr, struct iattr *sattr, 78 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -753,9 +750,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
753 750
754 spin_lock(&dir->i_lock); 751 spin_lock(&dir->i_lock);
755 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; 752 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
756 if (!cinfo->atomic || cinfo->before != nfsi->change_attr) 753 if (!cinfo->atomic || cinfo->before != dir->i_version)
757 nfs_force_lookup_revalidate(dir); 754 nfs_force_lookup_revalidate(dir);
758 nfsi->change_attr = cinfo->after; 755 dir->i_version = cinfo->after;
759 spin_unlock(&dir->i_lock); 756 spin_unlock(&dir->i_lock);
760} 757}
761 758
@@ -1596,8 +1593,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1596 int status; 1593 int status;
1597 1594
1598 status = nfs4_run_open_task(data, 0); 1595 status = nfs4_run_open_task(data, 0);
1599 if (status != 0 || !data->rpc_done) 1596 if (!data->rpc_done)
1597 return status;
1598 if (status != 0) {
1599 if (status == -NFS4ERR_BADNAME &&
1600 !(o_arg->open_flags & O_CREAT))
1601 return -ENOENT;
1600 return status; 1602 return status;
1603 }
1601 1604
1602 if (o_arg->open_flags & O_CREAT) { 1605 if (o_arg->open_flags & O_CREAT) {
1603 update_changeattr(dir, &o_res->cinfo); 1606 update_changeattr(dir, &o_res->cinfo);
@@ -2408,14 +2411,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2408 return status; 2411 return status;
2409} 2412}
2410 2413
2411static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, 2414static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2412 const struct nfs_fh *dirfh, const struct qstr *name, 2415 const struct qstr *name, struct nfs_fh *fhandle,
2413 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2416 struct nfs_fattr *fattr)
2414{ 2417{
2418 struct nfs_server *server = NFS_SERVER(dir);
2415 int status; 2419 int status;
2416 struct nfs4_lookup_arg args = { 2420 struct nfs4_lookup_arg args = {
2417 .bitmask = server->attr_bitmask, 2421 .bitmask = server->attr_bitmask,
2418 .dir_fh = dirfh, 2422 .dir_fh = NFS_FH(dir),
2419 .name = name, 2423 .name = name,
2420 }; 2424 };
2421 struct nfs4_lookup_res res = { 2425 struct nfs4_lookup_res res = {
@@ -2431,40 +2435,8 @@ static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
2431 2435
2432 nfs_fattr_init(fattr); 2436 nfs_fattr_init(fattr);
2433 2437
2434 dprintk("NFS call lookupfh %s\n", name->name);
2435 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2436 dprintk("NFS reply lookupfh: %d\n", status);
2437 return status;
2438}
2439
2440static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2441 struct qstr *name, struct nfs_fh *fhandle,
2442 struct nfs_fattr *fattr)
2443{
2444 struct nfs4_exception exception = { };
2445 int err;
2446 do {
2447 err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
2448 /* FIXME: !!!! */
2449 if (err == -NFS4ERR_MOVED) {
2450 err = -EREMOTE;
2451 break;
2452 }
2453 err = nfs4_handle_exception(server, err, &exception);
2454 } while (exception.retry);
2455 return err;
2456}
2457
2458static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2459 const struct qstr *name, struct nfs_fh *fhandle,
2460 struct nfs_fattr *fattr)
2461{
2462 int status;
2463
2464 dprintk("NFS call lookup %s\n", name->name); 2438 dprintk("NFS call lookup %s\n", name->name);
2465 status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); 2439 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2466 if (status == -NFS4ERR_MOVED)
2467 status = nfs4_get_referral(dir, name, fattr, fhandle);
2468 dprintk("NFS reply lookup: %d\n", status); 2440 dprintk("NFS reply lookup: %d\n", status);
2469 return status; 2441 return status;
2470} 2442}
@@ -2485,11 +2457,20 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst
2485 struct nfs4_exception exception = { }; 2457 struct nfs4_exception exception = { };
2486 int err; 2458 int err;
2487 do { 2459 do {
2488 err = nfs4_handle_exception(NFS_SERVER(dir), 2460 int status;
2489 _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr), 2461
2490 &exception); 2462 status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr);
2491 if (err == -EPERM) 2463 switch (status) {
2464 case -NFS4ERR_BADNAME:
2465 return -ENOENT;
2466 case -NFS4ERR_MOVED:
2467 err = nfs4_get_referral(dir, name, fattr, fhandle);
2468 break;
2469 case -NFS4ERR_WRONGSEC:
2492 nfs_fixup_secinfo_attributes(fattr, fhandle); 2470 nfs_fixup_secinfo_attributes(fattr, fhandle);
2471 }
2472 err = nfs4_handle_exception(NFS_SERVER(dir),
2473 status, &exception);
2493 } while (exception.retry); 2474 } while (exception.retry);
2494 return err; 2475 return err;
2495} 2476}
@@ -3210,7 +3191,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3210 struct nfs_server *server = NFS_SERVER(data->inode); 3191 struct nfs_server *server = NFS_SERVER(data->inode);
3211 3192
3212 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3193 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3213 nfs_restart_rpc(task, server->nfs_client); 3194 rpc_restart_call_prepare(task);
3214 return -EAGAIN; 3195 return -EAGAIN;
3215 } 3196 }
3216 3197
@@ -3260,7 +3241,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
3260 struct inode *inode = data->inode; 3241 struct inode *inode = data->inode;
3261 3242
3262 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3243 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3263 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3244 rpc_restart_call_prepare(task);
3264 return -EAGAIN; 3245 return -EAGAIN;
3265 } 3246 }
3266 if (task->tk_status >= 0) { 3247 if (task->tk_status >= 0) {
@@ -3317,7 +3298,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
3317 struct inode *inode = data->inode; 3298 struct inode *inode = data->inode;
3318 3299
3319 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3300 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3320 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3301 rpc_restart_call_prepare(task);
3321 return -EAGAIN; 3302 return -EAGAIN;
3322 } 3303 }
3323 nfs_refresh_inode(inode, data->res.fattr); 3304 nfs_refresh_inode(inode, data->res.fattr);
@@ -3374,9 +3355,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3374 3355
3375 if (task->tk_status < 0) { 3356 if (task->tk_status < 0) {
3376 /* Unless we're shutting down, schedule state recovery! */ 3357 /* Unless we're shutting down, schedule state recovery! */
3377 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3358 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
3359 return;
3360 if (task->tk_status != NFS4ERR_CB_PATH_DOWN) {
3378 nfs4_schedule_lease_recovery(clp); 3361 nfs4_schedule_lease_recovery(clp);
3379 return; 3362 return;
3363 }
3364 nfs4_schedule_path_down_recovery(clp);
3380 } 3365 }
3381 do_renew_lease(clp, timestamp); 3366 do_renew_lease(clp, timestamp);
3382} 3367}
@@ -3386,7 +3371,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
3386 .rpc_release = nfs4_renew_release, 3371 .rpc_release = nfs4_renew_release,
3387}; 3372};
3388 3373
3389int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) 3374static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
3390{ 3375{
3391 struct rpc_message msg = { 3376 struct rpc_message msg = {
3392 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 3377 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -3395,9 +3380,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
3395 }; 3380 };
3396 struct nfs4_renewdata *data; 3381 struct nfs4_renewdata *data;
3397 3382
3383 if (renew_flags == 0)
3384 return 0;
3398 if (!atomic_inc_not_zero(&clp->cl_count)) 3385 if (!atomic_inc_not_zero(&clp->cl_count))
3399 return -EIO; 3386 return -EIO;
3400 data = kmalloc(sizeof(*data), GFP_KERNEL); 3387 data = kmalloc(sizeof(*data), GFP_NOFS);
3401 if (data == NULL) 3388 if (data == NULL)
3402 return -ENOMEM; 3389 return -ENOMEM;
3403 data->client = clp; 3390 data->client = clp;
@@ -3406,7 +3393,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
3406 &nfs4_renew_ops, data); 3393 &nfs4_renew_ops, data);
3407} 3394}
3408 3395
3409int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) 3396static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
3410{ 3397{
3411 struct rpc_message msg = { 3398 struct rpc_message msg = {
3412 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 3399 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -3851,7 +3838,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
3851 default: 3838 default:
3852 if (nfs4_async_handle_error(task, data->res.server, NULL) == 3839 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
3853 -EAGAIN) { 3840 -EAGAIN) {
3854 nfs_restart_rpc(task, data->res.server->nfs_client); 3841 rpc_restart_call_prepare(task);
3855 return; 3842 return;
3856 } 3843 }
3857 } 3844 }
@@ -4105,8 +4092,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
4105 break; 4092 break;
4106 default: 4093 default:
4107 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) 4094 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
4108 nfs_restart_rpc(task, 4095 rpc_restart_call_prepare(task);
4109 calldata->server->nfs_client);
4110 } 4096 }
4111} 4097}
4112 4098
@@ -4939,7 +4925,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4939 task->tk_status = 0; 4925 task->tk_status = 0;
4940 /* fall through */ 4926 /* fall through */
4941 case -NFS4ERR_RETRY_UNCACHED_REP: 4927 case -NFS4ERR_RETRY_UNCACHED_REP:
4942 nfs_restart_rpc(task, data->clp); 4928 rpc_restart_call_prepare(task);
4943 return; 4929 return;
4944 } 4930 }
4945 dprintk("<-- %s\n", __func__); 4931 dprintk("<-- %s\n", __func__);
@@ -5504,11 +5490,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5504 return rpc_run_task(&task_setup_data); 5490 return rpc_run_task(&task_setup_data);
5505} 5491}
5506 5492
5507static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) 5493static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
5508{ 5494{
5509 struct rpc_task *task; 5495 struct rpc_task *task;
5510 int ret = 0; 5496 int ret = 0;
5511 5497
5498 if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
5499 return 0;
5512 task = _nfs41_proc_sequence(clp, cred); 5500 task = _nfs41_proc_sequence(clp, cred);
5513 if (IS_ERR(task)) 5501 if (IS_ERR(task))
5514 ret = PTR_ERR(task); 5502 ret = PTR_ERR(task);
@@ -5778,7 +5766,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
5778 5766
5779 server = NFS_SERVER(lrp->args.inode); 5767 server = NFS_SERVER(lrp->args.inode);
5780 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { 5768 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5781 nfs_restart_rpc(task, lrp->clp); 5769 rpc_restart_call_prepare(task);
5782 return; 5770 return;
5783 } 5771 }
5784 spin_lock(&lo->plh_inode->i_lock); 5772 spin_lock(&lo->plh_inode->i_lock);
@@ -5949,7 +5937,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5949 } 5937 }
5950 5938
5951 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { 5939 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5952 nfs_restart_rpc(task, server->nfs_client); 5940 rpc_restart_call_prepare(task);
5953 return; 5941 return;
5954 } 5942 }
5955 5943
@@ -6262,7 +6250,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6262 .getroot = nfs4_proc_get_root, 6250 .getroot = nfs4_proc_get_root,
6263 .getattr = nfs4_proc_getattr, 6251 .getattr = nfs4_proc_getattr,
6264 .setattr = nfs4_proc_setattr, 6252 .setattr = nfs4_proc_setattr,
6265 .lookupfh = nfs4_proc_lookupfh,
6266 .lookup = nfs4_proc_lookup, 6253 .lookup = nfs4_proc_lookup,
6267 .access = nfs4_proc_access, 6254 .access = nfs4_proc_access,
6268 .readlink = nfs4_proc_readlink, 6255 .readlink = nfs4_proc_readlink,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index df8e7f3ca56d..dc484c0eae7f 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work)
60 struct rpc_cred *cred; 60 struct rpc_cred *cred;
61 long lease; 61 long lease;
62 unsigned long last, now; 62 unsigned long last, now;
63 unsigned renew_flags = 0;
63 64
64 ops = clp->cl_mvops->state_renewal_ops; 65 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 66 dprintk("%s: start\n", __func__);
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work)
72 last = clp->cl_last_renewal; 73 last = clp->cl_last_renewal;
73 now = jiffies; 74 now = jiffies;
74 /* Are we close to a lease timeout? */ 75 /* Are we close to a lease timeout? */
75 if (time_after(now, last + lease/3)) { 76 if (time_after(now, last + lease/3))
77 renew_flags |= NFS4_RENEW_TIMEOUT;
78 if (nfs_delegations_present(clp))
79 renew_flags |= NFS4_RENEW_DELEGATION_CB;
80
81 if (renew_flags != 0) {
76 cred = ops->get_state_renewal_cred_locked(clp); 82 cred = ops->get_state_renewal_cred_locked(clp);
77 spin_unlock(&clp->cl_lock); 83 spin_unlock(&clp->cl_lock);
78 if (cred == NULL) { 84 if (cred == NULL) {
79 if (!nfs_delegations_present(clp)) { 85 if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) {
80 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 86 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
81 goto out; 87 goto out;
82 } 88 }
83 nfs_expire_all_delegations(clp); 89 nfs_expire_all_delegations(clp);
84 } else { 90 } else {
85 /* Queue an asynchronous RENEW. */ 91 /* Queue an asynchronous RENEW. */
86 ops->sched_state_renewal(clp, cred); 92 ops->sched_state_renewal(clp, cred, renew_flags);
87 put_rpccred(cred); 93 put_rpccred(cred);
88 goto out_exp; 94 goto out_exp;
89 } 95 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 72ab97ef3d61..39914be40b03 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1038 nfs4_schedule_state_manager(clp); 1038 nfs4_schedule_state_manager(clp);
1039} 1039}
1040 1040
1041void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1042{
1043 nfs_handle_cb_pathdown(clp);
1044 nfs4_schedule_state_manager(clp);
1045}
1046
1041static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1047static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1042{ 1048{
1043 1049
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e8836c37..ee73d9a4f700 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1168,23 +1168,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1168/* 1168/*
1169 * Called by non rpc-based layout drivers 1169 * Called by non rpc-based layout drivers
1170 */ 1170 */
1171int 1171void pnfs_ld_write_done(struct nfs_write_data *data)
1172pnfs_ld_write_done(struct nfs_write_data *data)
1173{ 1172{
1174 int status; 1173 if (likely(!data->pnfs_error)) {
1175
1176 if (!data->pnfs_error) {
1177 pnfs_set_layoutcommit(data); 1174 pnfs_set_layoutcommit(data);
1178 data->mds_ops->rpc_call_done(&data->task, data); 1175 data->mds_ops->rpc_call_done(&data->task, data);
1179 data->mds_ops->rpc_release(data); 1176 } else {
1180 return 0; 1177 put_lseg(data->lseg);
1178 data->lseg = NULL;
1179 dprintk("pnfs write error = %d\n", data->pnfs_error);
1181 } 1180 }
1182 1181 data->mds_ops->rpc_release(data);
1183 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
1184 data->pnfs_error);
1185 status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
1186 data->mds_ops, NFS_FILE_SYNC);
1187 return status ? : -EAGAIN;
1188} 1182}
1189EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1183EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1190 1184
@@ -1268,23 +1262,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1268/* 1262/*
1269 * Called by non rpc-based layout drivers 1263 * Called by non rpc-based layout drivers
1270 */ 1264 */
1271int 1265void pnfs_ld_read_done(struct nfs_read_data *data)
1272pnfs_ld_read_done(struct nfs_read_data *data)
1273{ 1266{
1274 int status; 1267 if (likely(!data->pnfs_error)) {
1275
1276 if (!data->pnfs_error) {
1277 __nfs4_read_done_cb(data); 1268 __nfs4_read_done_cb(data);
1278 data->mds_ops->rpc_call_done(&data->task, data); 1269 data->mds_ops->rpc_call_done(&data->task, data);
1279 data->mds_ops->rpc_release(data); 1270 } else {
1280 return 0; 1271 put_lseg(data->lseg);
1272 data->lseg = NULL;
1273 dprintk("pnfs write error = %d\n", data->pnfs_error);
1281 } 1274 }
1282 1275 data->mds_ops->rpc_release(data);
1283 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
1284 data->pnfs_error);
1285 status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
1286 data->mds_ops);
1287 return status ? : -EAGAIN;
1288} 1276}
1289EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1277EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1290 1278
@@ -1381,6 +1369,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1381 } 1369 }
1382} 1370}
1383 1371
1372void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1373{
1374 if (lseg->pls_range.iomode == IOMODE_RW) {
1375 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
1376 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
1377 } else {
1378 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
1379 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
1380 }
1381}
1382EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1383
1384void 1384void
1385pnfs_set_layoutcommit(struct nfs_write_data *wdata) 1385pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1386{ 1386{
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd54f3cb..1509530cb111 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
178void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); 178void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
179int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 179int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
180bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 180bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
181void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
181int pnfs_layout_process(struct nfs4_layoutget *lgp); 182int pnfs_layout_process(struct nfs4_layoutget *lgp);
182void pnfs_free_lseg_list(struct list_head *tmp_list); 183void pnfs_free_lseg_list(struct list_head *tmp_list);
183void pnfs_destroy_layout(struct nfs_inode *); 184void pnfs_destroy_layout(struct nfs_inode *);
@@ -200,8 +201,8 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
200void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 201void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
201int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 202int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
202int _pnfs_return_layout(struct inode *); 203int _pnfs_return_layout(struct inode *);
203int pnfs_ld_write_done(struct nfs_write_data *); 204void pnfs_ld_write_done(struct nfs_write_data *);
204int pnfs_ld_read_done(struct nfs_read_data *); 205void pnfs_ld_read_done(struct nfs_read_data *);
205struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 206struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
206 struct nfs_open_context *ctx, 207 struct nfs_open_context *ctx,
207 loff_t pos, 208 loff_t pos,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2171c043ab08..8b48ec63f722 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,16 +35,13 @@ static const struct rpc_call_ops nfs_read_partial_ops;
35static const struct rpc_call_ops nfs_read_full_ops; 35static const struct rpc_call_ops nfs_read_full_ops;
36 36
37static struct kmem_cache *nfs_rdata_cachep; 37static struct kmem_cache *nfs_rdata_cachep;
38static mempool_t *nfs_rdata_mempool;
39
40#define MIN_POOL_READ (32)
41 38
42struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 39struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
43{ 40{
44 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); 41 struct nfs_read_data *p;
45 42
43 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
46 if (p) { 44 if (p) {
47 memset(p, 0, sizeof(*p));
48 INIT_LIST_HEAD(&p->pages); 45 INIT_LIST_HEAD(&p->pages);
49 p->npages = pagecount; 46 p->npages = pagecount;
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 47 if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -52,7 +49,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
52 else { 49 else {
53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 50 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
54 if (!p->pagevec) { 51 if (!p->pagevec) {
55 mempool_free(p, nfs_rdata_mempool); 52 kmem_cache_free(nfs_rdata_cachep, p);
56 p = NULL; 53 p = NULL;
57 } 54 }
58 } 55 }
@@ -64,7 +61,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
64{ 61{
65 if (p && (p->pagevec != &p->page_array[0])) 62 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 63 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 64 kmem_cache_free(nfs_rdata_cachep, p);
68} 65}
69 66
70void nfs_readdata_release(struct nfs_read_data *rdata) 67void nfs_readdata_release(struct nfs_read_data *rdata)
@@ -276,7 +273,6 @@ nfs_async_read_error(struct list_head *head)
276 while (!list_empty(head)) { 273 while (!list_empty(head)) {
277 req = nfs_list_entry(head->next); 274 req = nfs_list_entry(head->next);
278 nfs_list_remove_request(req); 275 nfs_list_remove_request(req);
279 SetPageError(req->wb_page);
280 nfs_readpage_release(req); 276 nfs_readpage_release(req);
281 } 277 }
282} 278}
@@ -322,7 +318,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
322 offset += len; 318 offset += len;
323 } while(nbytes != 0); 319 } while(nbytes != 0);
324 atomic_set(&req->wb_complete, requests); 320 atomic_set(&req->wb_complete, requests);
325 ClearPageError(page);
326 desc->pg_rpc_callops = &nfs_read_partial_ops; 321 desc->pg_rpc_callops = &nfs_read_partial_ops;
327 return ret; 322 return ret;
328out_bad: 323out_bad:
@@ -331,7 +326,6 @@ out_bad:
331 list_del(&data->list); 326 list_del(&data->list);
332 nfs_readdata_free(data); 327 nfs_readdata_free(data);
333 } 328 }
334 SetPageError(page);
335 nfs_readpage_release(req); 329 nfs_readpage_release(req);
336 return -ENOMEM; 330 return -ENOMEM;
337} 331}
@@ -357,7 +351,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
357 req = nfs_list_entry(head->next); 351 req = nfs_list_entry(head->next);
358 nfs_list_remove_request(req); 352 nfs_list_remove_request(req);
359 nfs_list_add_request(req, &data->pages); 353 nfs_list_add_request(req, &data->pages);
360 ClearPageError(req->wb_page);
361 *pages++ = req->wb_page; 354 *pages++ = req->wb_page;
362 } 355 }
363 req = nfs_list_entry(data->pages.next); 356 req = nfs_list_entry(data->pages.next);
@@ -435,7 +428,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
435 argp->offset += resp->count; 428 argp->offset += resp->count;
436 argp->pgbase += resp->count; 429 argp->pgbase += resp->count;
437 argp->count -= resp->count; 430 argp->count -= resp->count;
438 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 431 rpc_restart_call_prepare(task);
439} 432}
440 433
441/* 434/*
@@ -462,10 +455,10 @@ static void nfs_readpage_release_partial(void *calldata)
462 int status = data->task.tk_status; 455 int status = data->task.tk_status;
463 456
464 if (status < 0) 457 if (status < 0)
465 SetPageError(page); 458 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
466 459
467 if (atomic_dec_and_test(&req->wb_complete)) { 460 if (atomic_dec_and_test(&req->wb_complete)) {
468 if (!PageError(page)) 461 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
469 SetPageUptodate(page); 462 SetPageUptodate(page);
470 nfs_readpage_release(req); 463 nfs_readpage_release(req);
471 } 464 }
@@ -541,13 +534,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
541static void nfs_readpage_release_full(void *calldata) 534static void nfs_readpage_release_full(void *calldata)
542{ 535{
543 struct nfs_read_data *data = calldata; 536 struct nfs_read_data *data = calldata;
537 struct nfs_pageio_descriptor pgio;
544 538
539 if (data->pnfs_error) {
540 nfs_pageio_init_read_mds(&pgio, data->inode);
541 pgio.pg_recoalesce = 1;
542 }
545 while (!list_empty(&data->pages)) { 543 while (!list_empty(&data->pages)) {
546 struct nfs_page *req = nfs_list_entry(data->pages.next); 544 struct nfs_page *req = nfs_list_entry(data->pages.next);
547 545
548 nfs_list_remove_request(req); 546 nfs_list_remove_request(req);
549 nfs_readpage_release(req); 547 if (!data->pnfs_error)
548 nfs_readpage_release(req);
549 else
550 nfs_pageio_add_request(&pgio, req);
550 } 551 }
552 if (data->pnfs_error)
553 nfs_pageio_complete(&pgio);
551 nfs_readdata_release(calldata); 554 nfs_readdata_release(calldata);
552} 555}
553 556
@@ -648,7 +651,6 @@ readpage_async_filler(void *data, struct page *page)
648 return 0; 651 return 0;
649out_error: 652out_error:
650 error = PTR_ERR(new); 653 error = PTR_ERR(new);
651 SetPageError(page);
652out_unlock: 654out_unlock:
653 unlock_page(page); 655 unlock_page(page);
654 return error; 656 return error;
@@ -711,16 +713,10 @@ int __init nfs_init_readpagecache(void)
711 if (nfs_rdata_cachep == NULL) 713 if (nfs_rdata_cachep == NULL)
712 return -ENOMEM; 714 return -ENOMEM;
713 715
714 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
715 nfs_rdata_cachep);
716 if (nfs_rdata_mempool == NULL)
717 return -ENOMEM;
718
719 return 0; 716 return 0;
720} 717}
721 718
722void nfs_destroy_readpagecache(void) 719void nfs_destroy_readpagecache(void)
723{ 720{
724 mempool_destroy(nfs_rdata_mempool);
725 kmem_cache_destroy(nfs_rdata_cachep); 721 kmem_cache_destroy(nfs_rdata_cachep);
726} 722}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b961ceac66b4..480b3b6bf71e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -733,18 +733,22 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
733 733
734 return 0; 734 return 0;
735} 735}
736
737#ifdef CONFIG_NFS_V4
736#ifdef CONFIG_NFS_V4_1 738#ifdef CONFIG_NFS_V4_1
737void show_sessions(struct seq_file *m, struct nfs_server *server) 739static void show_sessions(struct seq_file *m, struct nfs_server *server)
738{ 740{
739 if (nfs4_has_session(server->nfs_client)) 741 if (nfs4_has_session(server->nfs_client))
740 seq_printf(m, ",sessions"); 742 seq_printf(m, ",sessions");
741} 743}
742#else 744#else
743void show_sessions(struct seq_file *m, struct nfs_server *server) {} 745static void show_sessions(struct seq_file *m, struct nfs_server *server) {}
746#endif
744#endif 747#endif
745 748
749#ifdef CONFIG_NFS_V4
746#ifdef CONFIG_NFS_V4_1 750#ifdef CONFIG_NFS_V4_1
747void show_pnfs(struct seq_file *m, struct nfs_server *server) 751static void show_pnfs(struct seq_file *m, struct nfs_server *server)
748{ 752{
749 seq_printf(m, ",pnfs="); 753 seq_printf(m, ",pnfs=");
750 if (server->pnfs_curr_ld) 754 if (server->pnfs_curr_ld)
@@ -752,9 +756,10 @@ void show_pnfs(struct seq_file *m, struct nfs_server *server)
752 else 756 else
753 seq_printf(m, "not configured"); 757 seq_printf(m, "not configured");
754} 758}
755#else /* CONFIG_NFS_V4_1 */ 759#else
756void show_pnfs(struct seq_file *m, struct nfs_server *server) {} 760static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
757#endif /* CONFIG_NFS_V4_1 */ 761#endif
762#endif
758 763
759static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) 764static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
760{ 765{
@@ -2035,9 +2040,6 @@ static inline void nfs_initialise_sb(struct super_block *sb)
2035 sb->s_blocksize = nfs_block_bits(server->wsize, 2040 sb->s_blocksize = nfs_block_bits(server->wsize,
2036 &sb->s_blocksize_bits); 2041 &sb->s_blocksize_bits);
2037 2042
2038 if (server->flags & NFS_MOUNT_NOAC)
2039 sb->s_flags |= MS_SYNCHRONOUS;
2040
2041 sb->s_bdi = &server->backing_dev_info; 2043 sb->s_bdi = &server->backing_dev_info;
2042 2044
2043 nfs_super_set_maxbytes(sb, server->maxfilesize); 2045 nfs_super_set_maxbytes(sb, server->maxfilesize);
@@ -2249,6 +2251,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2249 if (server->flags & NFS_MOUNT_UNSHARED) 2251 if (server->flags & NFS_MOUNT_UNSHARED)
2250 compare_super = NULL; 2252 compare_super = NULL;
2251 2253
2254 /* -o noac implies -o sync */
2255 if (server->flags & NFS_MOUNT_NOAC)
2256 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2257
2252 /* Get a superblock - note that we may end up sharing one that already exists */ 2258 /* Get a superblock - note that we may end up sharing one that already exists */
2253 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2259 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
2254 if (IS_ERR(s)) { 2260 if (IS_ERR(s)) {
@@ -2361,6 +2367,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2361 if (server->flags & NFS_MOUNT_UNSHARED) 2367 if (server->flags & NFS_MOUNT_UNSHARED)
2362 compare_super = NULL; 2368 compare_super = NULL;
2363 2369
2370 /* -o noac implies -o sync */
2371 if (server->flags & NFS_MOUNT_NOAC)
2372 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2373
2364 /* Get a superblock - note that we may end up sharing one that already exists */ 2374 /* Get a superblock - note that we may end up sharing one that already exists */
2365 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2375 s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2366 if (IS_ERR(s)) { 2376 if (IS_ERR(s)) {
@@ -2628,6 +2638,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2628 if (server->flags & NFS4_MOUNT_UNSHARED) 2638 if (server->flags & NFS4_MOUNT_UNSHARED)
2629 compare_super = NULL; 2639 compare_super = NULL;
2630 2640
2641 /* -o noac implies -o sync */
2642 if (server->flags & NFS_MOUNT_NOAC)
2643 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2644
2631 /* Get a superblock - note that we may end up sharing one that already exists */ 2645 /* Get a superblock - note that we may end up sharing one that already exists */
2632 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2646 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2633 if (IS_ERR(s)) { 2647 if (IS_ERR(s)) {
@@ -2789,7 +2803,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2789 goto out_put_mnt_ns; 2803 goto out_put_mnt_ns;
2790 2804
2791 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2805 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2792 export_path, LOOKUP_FOLLOW, &path); 2806 export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2793 2807
2794 nfs_referral_loop_unprotect(); 2808 nfs_referral_loop_unprotect();
2795 put_mnt_ns(ns_private); 2809 put_mnt_ns(ns_private);
@@ -2916,6 +2930,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
2916 if (server->flags & NFS4_MOUNT_UNSHARED) 2930 if (server->flags & NFS4_MOUNT_UNSHARED)
2917 compare_super = NULL; 2931 compare_super = NULL;
2918 2932
2933 /* -o noac implies -o sync */
2934 if (server->flags & NFS_MOUNT_NOAC)
2935 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2936
2919 /* Get a superblock - note that we may end up sharing one that already exists */ 2937 /* Get a superblock - note that we may end up sharing one that already exists */
2920 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); 2938 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
2921 if (IS_ERR(s)) { 2939 if (IS_ERR(s)) {
@@ -3003,6 +3021,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3003 if (server->flags & NFS4_MOUNT_UNSHARED) 3021 if (server->flags & NFS4_MOUNT_UNSHARED)
3004 compare_super = NULL; 3022 compare_super = NULL;
3005 3023
3024 /* -o noac implies -o sync */
3025 if (server->flags & NFS_MOUNT_NOAC)
3026 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
3027
3006 /* Get a superblock - note that we may end up sharing one that already exists */ 3028 /* Get a superblock - note that we may end up sharing one that already exists */
3007 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); 3029 s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
3008 if (IS_ERR(s)) { 3030 if (IS_ERR(s)) {
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index b2fbbde58e44..4f9319a2e567 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -87,7 +87,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
87 struct inode *dir = data->dir; 87 struct inode *dir = data->dir;
88 88
89 if (!NFS_PROTO(dir)->unlink_done(task, dir)) 89 if (!NFS_PROTO(dir)->unlink_done(task, dir))
90 nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client); 90 rpc_restart_call_prepare(task);
91} 91}
92 92
93/** 93/**
@@ -369,7 +369,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
369 struct dentry *new_dentry = data->new_dentry; 369 struct dentry *new_dentry = data->new_dentry;
370 370
371 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { 371 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
372 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); 372 rpc_restart_call_prepare(task);
373 return; 373 return;
374 } 374 }
375 375
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b39b37f80913..2219c88d96b2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -390,7 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
391 BUG_ON(error); 391 BUG_ON(error);
392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
393 nfsi->change_attr++; 393 inode->i_version++;
394 set_bit(PG_MAPPED, &req->wb_flags); 394 set_bit(PG_MAPPED, &req->wb_flags);
395 SetPagePrivate(req->wb_page); 395 SetPagePrivate(req->wb_page);
396 set_page_private(req->wb_page, (unsigned long)req); 396 set_page_private(req->wb_page, (unsigned long)req);
@@ -428,7 +428,6 @@ static void
428nfs_mark_request_dirty(struct nfs_page *req) 428nfs_mark_request_dirty(struct nfs_page *req)
429{ 429{
430 __set_page_dirty_nobuffers(req->wb_page); 430 __set_page_dirty_nobuffers(req->wb_page);
431 __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
432} 431}
433 432
434#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 433#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page,
762 status = nfs_writepage_setup(ctx, page, offset, count); 761 status = nfs_writepage_setup(ctx, page, offset, count);
763 if (status < 0) 762 if (status < 0)
764 nfs_set_pageerror(page); 763 nfs_set_pageerror(page);
764 else
765 __set_page_dirty_nobuffers(page);
765 766
766 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 767 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
767 status, (long long)i_size_read(inode)); 768 status, (long long)i_size_read(inode));
@@ -958,7 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
958 if (!data) 959 if (!data)
959 goto out_bad; 960 goto out_bad;
960 data->pagevec[0] = page; 961 data->pagevec[0] = page;
961 nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); 962 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
962 list_add(&data->list, res); 963 list_add(&data->list, res);
963 requests++; 964 requests++;
964 nbytes -= len; 965 nbytes -= len;
@@ -1010,7 +1011,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
1010 req = nfs_list_entry(head->next); 1011 req = nfs_list_entry(head->next);
1011 nfs_list_remove_request(req); 1012 nfs_list_remove_request(req);
1012 nfs_list_add_request(req, &data->pages); 1013 nfs_list_add_request(req, &data->pages);
1013 ClearPageError(req->wb_page);
1014 *pages++ = req->wb_page; 1014 *pages++ = req->wb_page;
1015 } 1015 }
1016 req = nfs_list_entry(data->pages.next); 1016 req = nfs_list_entry(data->pages.next);
@@ -1165,7 +1165,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1165static void nfs_writeback_release_full(void *calldata) 1165static void nfs_writeback_release_full(void *calldata)
1166{ 1166{
1167 struct nfs_write_data *data = calldata; 1167 struct nfs_write_data *data = calldata;
1168 int status = data->task.tk_status; 1168 int ret, status = data->task.tk_status;
1169 struct nfs_pageio_descriptor pgio;
1170
1171 if (data->pnfs_error) {
1172 nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
1173 pgio.pg_recoalesce = 1;
1174 }
1169 1175
1170 /* Update attributes as result of writeback. */ 1176 /* Update attributes as result of writeback. */
1171 while (!list_empty(&data->pages)) { 1177 while (!list_empty(&data->pages)) {
@@ -1181,6 +1187,11 @@ static void nfs_writeback_release_full(void *calldata)
1181 req->wb_bytes, 1187 req->wb_bytes,
1182 (long long)req_offset(req)); 1188 (long long)req_offset(req));
1183 1189
1190 if (data->pnfs_error) {
1191 dprintk(", pnfs error = %d\n", data->pnfs_error);
1192 goto next;
1193 }
1194
1184 if (status < 0) { 1195 if (status < 0) {
1185 nfs_set_pageerror(page); 1196 nfs_set_pageerror(page);
1186 nfs_context_set_write_error(req->wb_context, status); 1197 nfs_context_set_write_error(req->wb_context, status);
@@ -1200,7 +1211,19 @@ remove_request:
1200 next: 1211 next:
1201 nfs_clear_page_tag_locked(req); 1212 nfs_clear_page_tag_locked(req);
1202 nfs_end_page_writeback(page); 1213 nfs_end_page_writeback(page);
1214 if (data->pnfs_error) {
1215 lock_page(page);
1216 nfs_pageio_cond_complete(&pgio, page->index);
1217 ret = nfs_page_async_flush(&pgio, page, 0);
1218 if (ret) {
1219 nfs_set_pageerror(page);
1220 dprintk("rewrite to MDS error = %d\n", ret);
1221 }
1222 unlock_page(page);
1223 }
1203 } 1224 }
1225 if (data->pnfs_error)
1226 nfs_pageio_complete(&pgio);
1204 nfs_writedata_release(calldata); 1227 nfs_writedata_release(calldata);
1205} 1228}
1206 1229
@@ -1281,7 +1304,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1281 */ 1304 */
1282 argp->stable = NFS_FILE_SYNC; 1305 argp->stable = NFS_FILE_SYNC;
1283 } 1306 }
1284 nfs_restart_rpc(task, server->nfs_client); 1307 rpc_restart_call_prepare(task);
1285 return; 1308 return;
1286 } 1309 }
1287 if (time_before(complain, jiffies)) { 1310 if (time_before(complain, jiffies)) {
@@ -1553,6 +1576,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1553 int flags = FLUSH_SYNC; 1576 int flags = FLUSH_SYNC;
1554 int ret = 0; 1577 int ret = 0;
1555 1578
1579 /* no commits means nothing needs to be done */
1580 if (!nfsi->ncommit)
1581 return ret;
1582
1556 if (wbc->sync_mode == WB_SYNC_NONE) { 1583 if (wbc->sync_mode == WB_SYNC_NONE) {
1557 /* Don't commit yet if this is a non-blocking flush and there 1584 /* Don't commit yet if this is a non-blocking flush and there
1558 * are a lot of outstanding writes for this mapping. 1585 * are a lot of outstanding writes for this mapping.
@@ -1686,34 +1713,20 @@ out_error:
1686int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1713int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1687 struct page *page) 1714 struct page *page)
1688{ 1715{
1689 struct nfs_page *req; 1716 /*
1690 int ret; 1717 * If PagePrivate is set, then the page is currently associated with
1718 * an in-progress read or write request. Don't try to migrate it.
1719 *
1720 * FIXME: we could do this in principle, but we'll need a way to ensure
1721 * that we can safely release the inode reference while holding
1722 * the page lock.
1723 */
1724 if (PagePrivate(page))
1725 return -EBUSY;
1691 1726
1692 nfs_fscache_release_page(page, GFP_KERNEL); 1727 nfs_fscache_release_page(page, GFP_KERNEL);
1693 1728
1694 req = nfs_find_and_lock_request(page, false); 1729 return migrate_page(mapping, newpage, page);
1695 ret = PTR_ERR(req);
1696 if (IS_ERR(req))
1697 goto out;
1698
1699 ret = migrate_page(mapping, newpage, page);
1700 if (!req)
1701 goto out;
1702 if (ret)
1703 goto out_unlock;
1704 page_cache_get(newpage);
1705 spin_lock(&mapping->host->i_lock);
1706 req->wb_page = newpage;
1707 SetPagePrivate(newpage);
1708 set_page_private(newpage, (unsigned long)req);
1709 ClearPagePrivate(page);
1710 set_page_private(page, 0);
1711 spin_unlock(&mapping->host->i_lock);
1712 page_cache_release(page);
1713out_unlock:
1714 nfs_clear_page_tag_locked(req);
1715out:
1716 return ret;
1717} 1730}
1718#endif 1731#endif
1719 1732
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index f4cc1e2bfc54..62f3b9074e84 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/exportfs.h> 17#include <linux/exportfs.h>
18 18
19#include <linux/nfsd/syscall.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
21 20
22#include "nfsd.h" 21#include "nfsd.h"
@@ -318,7 +317,6 @@ static void svc_export_put(struct kref *ref)
318 struct svc_export *exp = container_of(ref, struct svc_export, h.ref); 317 struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
319 path_put(&exp->ex_path); 318 path_put(&exp->ex_path);
320 auth_domain_put(exp->ex_client); 319 auth_domain_put(exp->ex_client);
321 kfree(exp->ex_pathname);
322 nfsd4_fslocs_free(&exp->ex_fslocs); 320 nfsd4_fslocs_free(&exp->ex_fslocs);
323 kfree(exp); 321 kfree(exp);
324} 322}
@@ -528,11 +526,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
528 526
529 exp.ex_client = dom; 527 exp.ex_client = dom;
530 528
531 err = -ENOMEM;
532 exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
533 if (!exp.ex_pathname)
534 goto out2;
535
536 /* expiry */ 529 /* expiry */
537 err = -EINVAL; 530 err = -EINVAL;
538 exp.h.expiry_time = get_expiry(&mesg); 531 exp.h.expiry_time = get_expiry(&mesg);
@@ -613,8 +606,6 @@ out4:
613 nfsd4_fslocs_free(&exp.ex_fslocs); 606 nfsd4_fslocs_free(&exp.ex_fslocs);
614 kfree(exp.ex_uuid); 607 kfree(exp.ex_uuid);
615out3: 608out3:
616 kfree(exp.ex_pathname);
617out2:
618 path_put(&exp.ex_path); 609 path_put(&exp.ex_path);
619out1: 610out1:
620 auth_domain_put(dom); 611 auth_domain_put(dom);
@@ -678,7 +669,6 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
678 new->ex_client = item->ex_client; 669 new->ex_client = item->ex_client;
679 new->ex_path.dentry = dget(item->ex_path.dentry); 670 new->ex_path.dentry = dget(item->ex_path.dentry);
680 new->ex_path.mnt = mntget(item->ex_path.mnt); 671 new->ex_path.mnt = mntget(item->ex_path.mnt);
681 new->ex_pathname = NULL;
682 new->ex_fslocs.locations = NULL; 672 new->ex_fslocs.locations = NULL;
683 new->ex_fslocs.locations_count = 0; 673 new->ex_fslocs.locations_count = 0;
684 new->ex_fslocs.migrated = 0; 674 new->ex_fslocs.migrated = 0;
@@ -696,8 +686,6 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
696 new->ex_fsid = item->ex_fsid; 686 new->ex_fsid = item->ex_fsid;
697 new->ex_uuid = item->ex_uuid; 687 new->ex_uuid = item->ex_uuid;
698 item->ex_uuid = NULL; 688 item->ex_uuid = NULL;
699 new->ex_pathname = item->ex_pathname;
700 item->ex_pathname = NULL;
701 new->ex_fslocs.locations = item->ex_fslocs.locations; 689 new->ex_fslocs.locations = item->ex_fslocs.locations;
702 item->ex_fslocs.locations = NULL; 690 item->ex_fslocs.locations = NULL;
703 new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; 691 new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
@@ -1010,7 +998,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1010 return exp; 998 return exp;
1011} 999}
1012 1000
1013static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) 1001struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp)
1014{ 1002{
1015 u32 fsidv[2]; 1003 u32 fsidv[2];
1016 1004
@@ -1030,7 +1018,7 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1030 struct svc_export *exp; 1018 struct svc_export *exp;
1031 __be32 rv; 1019 __be32 rv;
1032 1020
1033 exp = find_fsidzero_export(rqstp); 1021 exp = rqst_find_fsidzero_export(rqstp);
1034 if (IS_ERR(exp)) 1022 if (IS_ERR(exp))
1035 return nfserrno(PTR_ERR(exp)); 1023 return nfserrno(PTR_ERR(exp));
1036 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1024 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 02eb4edf0ece..7748d6a18d97 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -39,6 +39,8 @@
39 39
40#define NFSDDBG_FACILITY NFSDDBG_PROC 40#define NFSDDBG_FACILITY NFSDDBG_PROC
41 41
42static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
43
42#define NFSPROC4_CB_NULL 0 44#define NFSPROC4_CB_NULL 0
43#define NFSPROC4_CB_COMPOUND 1 45#define NFSPROC4_CB_COMPOUND 1
44 46
@@ -351,7 +353,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
351 __be32 *p; 353 __be32 *p;
352 354
353 encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); 355 encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
354 encode_stateid4(xdr, &dp->dl_stateid); 356 encode_stateid4(xdr, &dp->dl_stid.sc_stateid);
355 357
356 p = xdr_reserve_space(xdr, 4); 358 p = xdr_reserve_space(xdr, 4);
357 *p++ = xdr_zero; /* truncate */ 359 *p++ = xdr_zero; /* truncate */
@@ -460,6 +462,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
460 */ 462 */
461 status = 0; 463 status = 0;
462out: 464out:
465 if (status)
466 nfsd4_mark_cb_fault(cb->cb_clp, status);
463 return status; 467 return status;
464out_overflow: 468out_overflow:
465 print_overflow_msg(__func__, xdr); 469 print_overflow_msg(__func__, xdr);
@@ -686,6 +690,12 @@ static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
686 warn_no_callback_path(clp, reason); 690 warn_no_callback_path(clp, reason);
687} 691}
688 692
693static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
694{
695 clp->cl_cb_state = NFSD4_CB_FAULT;
696 warn_no_callback_path(clp, reason);
697}
698
689static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) 699static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
690{ 700{
691 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); 701 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
@@ -787,7 +797,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
787{ 797{
788 struct nfsd4_callback *cb = calldata; 798 struct nfsd4_callback *cb = calldata;
789 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); 799 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
790 struct nfs4_client *clp = dp->dl_client; 800 struct nfs4_client *clp = dp->dl_stid.sc_client;
791 u32 minorversion = clp->cl_minorversion; 801 u32 minorversion = clp->cl_minorversion;
792 802
793 cb->cb_minorversion = minorversion; 803 cb->cb_minorversion = minorversion;
@@ -809,7 +819,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
809{ 819{
810 struct nfsd4_callback *cb = calldata; 820 struct nfsd4_callback *cb = calldata;
811 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); 821 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
812 struct nfs4_client *clp = dp->dl_client; 822 struct nfs4_client *clp = dp->dl_stid.sc_client;
813 823
814 dprintk("%s: minorversion=%d\n", __func__, 824 dprintk("%s: minorversion=%d\n", __func__,
815 clp->cl_minorversion); 825 clp->cl_minorversion);
@@ -832,7 +842,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
832{ 842{
833 struct nfsd4_callback *cb = calldata; 843 struct nfsd4_callback *cb = calldata;
834 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); 844 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
835 struct nfs4_client *clp = dp->dl_client; 845 struct nfs4_client *clp = dp->dl_stid.sc_client;
836 struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 846 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
837 847
838 nfsd4_cb_done(task, calldata); 848 nfsd4_cb_done(task, calldata);
@@ -1006,7 +1016,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w)
1006void nfsd4_cb_recall(struct nfs4_delegation *dp) 1016void nfsd4_cb_recall(struct nfs4_delegation *dp)
1007{ 1017{
1008 struct nfsd4_callback *cb = &dp->dl_recall; 1018 struct nfsd4_callback *cb = &dp->dl_recall;
1009 struct nfs4_client *clp = dp->dl_client; 1019 struct nfs4_client *clp = dp->dl_stid.sc_client;
1010 1020
1011 dp->dl_retries = 1; 1021 dp->dl_retries = 1;
1012 cb->cb_op = dp; 1022 cb->cb_op = dp;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e80777666618..fa383361bc61 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@
35#include <linux/file.h> 35#include <linux/file.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37 37
38#include "idmap.h"
38#include "cache.h" 39#include "cache.h"
39#include "xdr4.h" 40#include "xdr4.h"
40#include "vfs.h" 41#include "vfs.h"
@@ -156,6 +157,8 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
156 !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) 157 !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
157 return nfserr_inval; 158 return nfserr_inval;
158 159
160 accmode |= NFSD_MAY_READ_IF_EXEC;
161
159 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 162 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
160 accmode |= NFSD_MAY_READ; 163 accmode |= NFSD_MAY_READ;
161 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 164 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
@@ -168,12 +171,29 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
168 return status; 171 return status;
169} 172}
170 173
174static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
175{
176 umode_t mode = fh->fh_dentry->d_inode->i_mode;
177
178 if (S_ISREG(mode))
179 return nfs_ok;
180 if (S_ISDIR(mode))
181 return nfserr_isdir;
182 /*
183 * Using err_symlink as our catch-all case may look odd; but
184 * there's no other obvious error for this case in 4.0, and we
185 * happen to know that it will cause the linux v4 client to do
186 * the right thing on attempts to open something other than a
187 * regular file.
188 */
189 return nfserr_symlink;
190}
191
171static __be32 192static __be32
172do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 193do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
173{ 194{
174 struct svc_fh resfh; 195 struct svc_fh resfh;
175 __be32 status; 196 __be32 status;
176 int created = 0;
177 197
178 fh_init(&resfh, NFS4_FHSIZE); 198 fh_init(&resfh, NFS4_FHSIZE);
179 open->op_truncate = 0; 199 open->op_truncate = 0;
@@ -202,7 +222,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
202 open->op_fname.len, &open->op_iattr, 222 open->op_fname.len, &open->op_iattr,
203 &resfh, open->op_createmode, 223 &resfh, open->op_createmode,
204 (u32 *)open->op_verf.data, 224 (u32 *)open->op_verf.data,
205 &open->op_truncate, &created); 225 &open->op_truncate, &open->op_created);
206 226
207 /* 227 /*
208 * Following rfc 3530 14.2.16, use the returned bitmask 228 * Following rfc 3530 14.2.16, use the returned bitmask
@@ -216,6 +236,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
216 status = nfsd_lookup(rqstp, current_fh, 236 status = nfsd_lookup(rqstp, current_fh,
217 open->op_fname.data, open->op_fname.len, &resfh); 237 open->op_fname.data, open->op_fname.len, &resfh);
218 fh_unlock(current_fh); 238 fh_unlock(current_fh);
239 if (status)
240 goto out;
241 status = nfsd_check_obj_isreg(&resfh);
219 } 242 }
220 if (status) 243 if (status)
221 goto out; 244 goto out;
@@ -227,9 +250,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
227 fh_dup2(current_fh, &resfh); 250 fh_dup2(current_fh, &resfh);
228 251
229 /* set reply cache */ 252 /* set reply cache */
230 fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, 253 fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
231 &resfh.fh_handle); 254 &resfh.fh_handle);
232 if (!created) 255 if (!open->op_created)
233 status = do_open_permission(rqstp, current_fh, open, 256 status = do_open_permission(rqstp, current_fh, open,
234 NFSD_MAY_NOP); 257 NFSD_MAY_NOP);
235 258
@@ -254,7 +277,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
254 memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); 277 memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
255 278
256 /* set replay cache */ 279 /* set replay cache */
257 fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, 280 fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
258 &current_fh->fh_handle); 281 &current_fh->fh_handle);
259 282
260 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && 283 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
@@ -283,14 +306,18 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
283 __be32 status; 306 __be32 status;
284 struct nfsd4_compoundres *resp; 307 struct nfsd4_compoundres *resp;
285 308
286 dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", 309 dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
287 (int)open->op_fname.len, open->op_fname.data, 310 (int)open->op_fname.len, open->op_fname.data,
288 open->op_stateowner); 311 open->op_openowner);
289 312
290 /* This check required by spec. */ 313 /* This check required by spec. */
291 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) 314 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
292 return nfserr_inval; 315 return nfserr_inval;
293 316
317 /* We don't yet support WANT bits: */
318 open->op_share_access &= NFS4_SHARE_ACCESS_MASK;
319
320 open->op_created = 0;
294 /* 321 /*
295 * RFC5661 18.51.3 322 * RFC5661 18.51.3
296 * Before RECLAIM_COMPLETE done, server should deny new lock 323 * Before RECLAIM_COMPLETE done, server should deny new lock
@@ -309,7 +336,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
309 resp = rqstp->rq_resp; 336 resp = rqstp->rq_resp;
310 status = nfsd4_process_open1(&resp->cstate, open); 337 status = nfsd4_process_open1(&resp->cstate, open);
311 if (status == nfserr_replay_me) { 338 if (status == nfserr_replay_me) {
312 struct nfs4_replay *rp = &open->op_stateowner->so_replay; 339 struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
313 fh_put(&cstate->current_fh); 340 fh_put(&cstate->current_fh);
314 fh_copy_shallow(&cstate->current_fh.fh_handle, 341 fh_copy_shallow(&cstate->current_fh.fh_handle,
315 &rp->rp_openfh); 342 &rp->rp_openfh);
@@ -339,32 +366,23 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
339 switch (open->op_claim_type) { 366 switch (open->op_claim_type) {
340 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 367 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
341 case NFS4_OPEN_CLAIM_NULL: 368 case NFS4_OPEN_CLAIM_NULL:
342 /*
343 * (1) set CURRENT_FH to the file being opened,
344 * creating it if necessary, (2) set open->op_cinfo,
345 * (3) set open->op_truncate if the file is to be
346 * truncated after opening, (4) do permission checking.
347 */
348 status = do_open_lookup(rqstp, &cstate->current_fh, 369 status = do_open_lookup(rqstp, &cstate->current_fh,
349 open); 370 open);
350 if (status) 371 if (status)
351 goto out; 372 goto out;
352 break; 373 break;
353 case NFS4_OPEN_CLAIM_PREVIOUS: 374 case NFS4_OPEN_CLAIM_PREVIOUS:
354 open->op_stateowner->so_confirmed = 1; 375 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
355 /* 376 case NFS4_OPEN_CLAIM_FH:
356 * The CURRENT_FH is already set to the file being 377 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
357 * opened. (1) set open->op_cinfo, (2) set
358 * open->op_truncate if the file is to be truncated
359 * after opening, (3) do permission checking.
360 */
361 status = do_open_fhandle(rqstp, &cstate->current_fh, 378 status = do_open_fhandle(rqstp, &cstate->current_fh,
362 open); 379 open);
363 if (status) 380 if (status)
364 goto out; 381 goto out;
365 break; 382 break;
383 case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
366 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 384 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
367 open->op_stateowner->so_confirmed = 1; 385 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
368 dprintk("NFSD: unsupported OPEN claim type %d\n", 386 dprintk("NFSD: unsupported OPEN claim type %d\n",
369 open->op_claim_type); 387 open->op_claim_type);
370 status = nfserr_notsupp; 388 status = nfserr_notsupp;
@@ -381,12 +399,13 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
381 * set, (2) sets open->op_stateid, (3) sets open->op_delegation. 399 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
382 */ 400 */
383 status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); 401 status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
402 WARN_ON(status && open->op_created);
384out: 403out:
385 if (open->op_stateowner) { 404 nfsd4_cleanup_open_state(open, status);
386 nfs4_get_stateowner(open->op_stateowner); 405 if (open->op_openowner)
387 cstate->replay_owner = open->op_stateowner; 406 cstate->replay_owner = &open->op_openowner->oo_owner;
388 } 407 else
389 nfs4_unlock_state(); 408 nfs4_unlock_state();
390 return status; 409 return status;
391} 410}
392 411
@@ -467,17 +486,12 @@ static __be32
467nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 486nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
468 struct nfsd4_commit *commit) 487 struct nfsd4_commit *commit)
469{ 488{
470 __be32 status;
471
472 u32 *p = (u32 *)commit->co_verf.data; 489 u32 *p = (u32 *)commit->co_verf.data;
473 *p++ = nfssvc_boot.tv_sec; 490 *p++ = nfssvc_boot.tv_sec;
474 *p++ = nfssvc_boot.tv_usec; 491 *p++ = nfssvc_boot.tv_usec;
475 492
476 status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, 493 return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
477 commit->co_count); 494 commit->co_count);
478 if (status == nfserr_symlink)
479 status = nfserr_inval;
480 return status;
481} 495}
482 496
483static __be32 497static __be32
@@ -492,8 +506,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
492 506
493 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, 507 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
494 NFSD_MAY_CREATE); 508 NFSD_MAY_CREATE);
495 if (status == nfserr_symlink)
496 status = nfserr_notdir;
497 if (status) 509 if (status)
498 return status; 510 return status;
499 511
@@ -691,7 +703,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
691 readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); 703 readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
692 readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); 704 readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
693 705
694 if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || 706 if ((cookie == 1) || (cookie == 2) ||
695 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) 707 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
696 return nfserr_bad_cookie; 708 return nfserr_bad_cookie;
697 709
@@ -719,8 +731,6 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
719 return nfserr_grace; 731 return nfserr_grace;
720 status = nfsd_unlink(rqstp, &cstate->current_fh, 0, 732 status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
721 remove->rm_name, remove->rm_namelen); 733 remove->rm_name, remove->rm_namelen);
722 if (status == nfserr_symlink)
723 return nfserr_notdir;
724 if (!status) { 734 if (!status) {
725 fh_unlock(&cstate->current_fh); 735 fh_unlock(&cstate->current_fh);
726 set_change_info(&remove->rm_cinfo, &cstate->current_fh); 736 set_change_info(&remove->rm_cinfo, &cstate->current_fh);
@@ -751,8 +761,6 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
751 (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && 761 (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) &&
752 S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) 762 S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode)))
753 status = nfserr_exist; 763 status = nfserr_exist;
754 else if (status == nfserr_symlink)
755 status = nfserr_notdir;
756 764
757 if (!status) { 765 if (!status) {
758 set_change_info(&rename->rn_sinfo, &cstate->current_fh); 766 set_change_info(&rename->rn_sinfo, &cstate->current_fh);
@@ -892,8 +900,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
892 900
893 write->wr_bytes_written = cnt; 901 write->wr_bytes_written = cnt;
894 902
895 if (status == nfserr_symlink)
896 status = nfserr_inval;
897 return status; 903 return status;
898} 904}
899 905
@@ -930,7 +936,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
930 count = 4 + (verify->ve_attrlen >> 2); 936 count = 4 + (verify->ve_attrlen >> 2);
931 buf = kmalloc(count << 2, GFP_KERNEL); 937 buf = kmalloc(count << 2, GFP_KERNEL);
932 if (!buf) 938 if (!buf)
933 return nfserr_resource; 939 return nfserr_jukebox;
934 940
935 status = nfsd4_encode_fattr(&cstate->current_fh, 941 status = nfsd4_encode_fattr(&cstate->current_fh,
936 cstate->current_fh.fh_export, 942 cstate->current_fh.fh_export,
@@ -994,6 +1000,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
994 1000
995typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, 1001typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
996 void *); 1002 void *);
1003typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
1004
997enum nfsd4_op_flags { 1005enum nfsd4_op_flags {
998 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 1006 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
999 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ 1007 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
@@ -1001,13 +1009,15 @@ enum nfsd4_op_flags {
1001 /* For rfc 5661 section 2.6.3.1.1: */ 1009 /* For rfc 5661 section 2.6.3.1.1: */
1002 OP_HANDLES_WRONGSEC = 1 << 3, 1010 OP_HANDLES_WRONGSEC = 1 << 3,
1003 OP_IS_PUTFH_LIKE = 1 << 4, 1011 OP_IS_PUTFH_LIKE = 1 << 4,
1004};
1005
1006struct nfsd4_operation {
1007 nfsd4op_func op_func;
1008 u32 op_flags;
1009 char *op_name;
1010 /* 1012 /*
1013 * These are the ops whose result size we estimate before
1014 * encoding, to avoid performing an op then not being able to
1015 * respond or cache a response. This includes writes and setattrs
1016 * as well as the operations usually called "nonidempotent":
1017 */
1018 OP_MODIFIES_SOMETHING = 1 << 5,
1019 /*
1020 * Cache compounds containing these ops in the xid-based drc:
1011 * We use the DRC for compounds containing non-idempotent 1021 * We use the DRC for compounds containing non-idempotent
1012 * operations, *except* those that are 4.1-specific (since 1022 * operations, *except* those that are 4.1-specific (since
1013 * sessions provide their own EOS), and except for stateful 1023 * sessions provide their own EOS), and except for stateful
@@ -1015,7 +1025,15 @@ struct nfsd4_operation {
1015 * (since sequence numbers provide EOS for open, lock, etc in 1025 * (since sequence numbers provide EOS for open, lock, etc in
1016 * the v4.0 case). 1026 * the v4.0 case).
1017 */ 1027 */
1018 bool op_cacheresult; 1028 OP_CACHEME = 1 << 6,
1029};
1030
1031struct nfsd4_operation {
1032 nfsd4op_func op_func;
1033 u32 op_flags;
1034 char *op_name;
1035 /* Try to get response size before operation */
1036 nfsd4op_rsize op_rsize_bop;
1019}; 1037};
1020 1038
1021static struct nfsd4_operation nfsd4_ops[]; 1039static struct nfsd4_operation nfsd4_ops[];
@@ -1062,7 +1080,7 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
1062 1080
1063bool nfsd4_cache_this_op(struct nfsd4_op *op) 1081bool nfsd4_cache_this_op(struct nfsd4_op *op)
1064{ 1082{
1065 return OPDESC(op)->op_cacheresult; 1083 return OPDESC(op)->op_flags & OP_CACHEME;
1066} 1084}
1067 1085
1068static bool need_wrongsec_check(struct svc_rqst *rqstp) 1086static bool need_wrongsec_check(struct svc_rqst *rqstp)
@@ -1110,6 +1128,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1110 struct nfsd4_operation *opdesc; 1128 struct nfsd4_operation *opdesc;
1111 struct nfsd4_compound_state *cstate = &resp->cstate; 1129 struct nfsd4_compound_state *cstate = &resp->cstate;
1112 int slack_bytes; 1130 int slack_bytes;
1131 u32 plen = 0;
1113 __be32 status; 1132 __be32 status;
1114 1133
1115 resp->xbuf = &rqstp->rq_res; 1134 resp->xbuf = &rqstp->rq_res;
@@ -1188,6 +1207,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1188 goto encode_op; 1207 goto encode_op;
1189 } 1208 }
1190 1209
1210 /* If op is non-idempotent */
1211 if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
1212 plen = opdesc->op_rsize_bop(rqstp, op);
1213 op->status = nfsd4_check_resp_size(resp, plen);
1214 }
1215
1216 if (op->status)
1217 goto encode_op;
1218
1191 if (opdesc->op_func) 1219 if (opdesc->op_func)
1192 op->status = opdesc->op_func(rqstp, cstate, &op->u); 1220 op->status = opdesc->op_func(rqstp, cstate, &op->u);
1193 else 1221 else
@@ -1217,7 +1245,7 @@ encode_op:
1217 be32_to_cpu(status)); 1245 be32_to_cpu(status));
1218 1246
1219 if (cstate->replay_owner) { 1247 if (cstate->replay_owner) {
1220 nfs4_put_stateowner(cstate->replay_owner); 1248 nfs4_unlock_state();
1221 cstate->replay_owner = NULL; 1249 cstate->replay_owner = NULL;
1222 } 1250 }
1223 /* XXX Ugh, we need to get rid of this kind of special case: */ 1251 /* XXX Ugh, we need to get rid of this kind of special case: */
@@ -1238,6 +1266,144 @@ out:
1238 return status; 1266 return status;
1239} 1267}
1240 1268
1269#define op_encode_hdr_size (2)
1270#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
1271#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
1272#define op_encode_change_info_maxsz (5)
1273#define nfs4_fattr_bitmap_maxsz (4)
1274
1275#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
1276#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
1277
1278#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
1279
1280#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz)
1281#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \
1282 op_encode_ace_maxsz)
1283
1284#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
1285
1286static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1287{
1288 return (op_encode_hdr_size) * sizeof(__be32);
1289}
1290
1291static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1292{
1293 return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
1294}
1295
1296static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1297{
1298 return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
1299}
1300
1301static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1302{
1303 return (op_encode_hdr_size + op_encode_change_info_maxsz
1304 + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
1305}
1306
1307static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1308{
1309 return (op_encode_hdr_size + op_encode_change_info_maxsz)
1310 * sizeof(__be32);
1311}
1312
1313static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1314{
1315 return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
1316 * sizeof(__be32);
1317}
1318
1319static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1320{
1321 return (op_encode_hdr_size + op_encode_stateid_maxsz
1322 + op_encode_change_info_maxsz + 1
1323 + nfs4_fattr_bitmap_maxsz
1324 + op_encode_delegation_maxsz) * sizeof(__be32);
1325}
1326
1327static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1328{
1329 u32 maxcount = 0, rlen = 0;
1330
1331 maxcount = svc_max_payload(rqstp);
1332 rlen = op->u.read.rd_length;
1333
1334 if (rlen > maxcount)
1335 rlen = maxcount;
1336
1337 return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
1338}
1339
1340static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1341{
1342 u32 rlen = op->u.readdir.rd_maxcount;
1343
1344 if (rlen > PAGE_SIZE)
1345 rlen = PAGE_SIZE;
1346
1347 return (op_encode_hdr_size + op_encode_verifier_maxsz)
1348 * sizeof(__be32) + rlen;
1349}
1350
1351static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1352{
1353 return (op_encode_hdr_size + op_encode_change_info_maxsz)
1354 * sizeof(__be32);
1355}
1356
1357static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1358{
1359 return (op_encode_hdr_size + op_encode_change_info_maxsz
1360 + op_encode_change_info_maxsz) * sizeof(__be32);
1361}
1362
1363static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1364{
1365 return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
1366}
1367
1368static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1369{
1370 return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
1371}
1372
1373static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1374{
1375 return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
1376}
1377
1378static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1379{
1380 return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
1381 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
1382 2 + /*eir_server_owner.so_minor_id */\
1383 /* eir_server_owner.so_major_id<> */\
1384 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
1385 /* eir_server_scope<> */\
1386 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
1387 1 + /* eir_server_impl_id array length */\
1388 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
1389}
1390
1391static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1392{
1393 return (op_encode_hdr_size + \
1394 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
1395 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
1396}
1397
1398static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1399{
1400 return (op_encode_hdr_size + \
1401 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
1402 2 + /* csr_sequence, csr_flags */\
1403 op_encode_channel_attrs_maxsz + \
1404 op_encode_channel_attrs_maxsz) * sizeof(__be32);
1405}
1406
1241static struct nfsd4_operation nfsd4_ops[] = { 1407static struct nfsd4_operation nfsd4_ops[] = {
1242 [OP_ACCESS] = { 1408 [OP_ACCESS] = {
1243 .op_func = (nfsd4op_func)nfsd4_access, 1409 .op_func = (nfsd4op_func)nfsd4_access,
@@ -1245,20 +1411,27 @@ static struct nfsd4_operation nfsd4_ops[] = {
1245 }, 1411 },
1246 [OP_CLOSE] = { 1412 [OP_CLOSE] = {
1247 .op_func = (nfsd4op_func)nfsd4_close, 1413 .op_func = (nfsd4op_func)nfsd4_close,
1414 .op_flags = OP_MODIFIES_SOMETHING,
1248 .op_name = "OP_CLOSE", 1415 .op_name = "OP_CLOSE",
1416 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1249 }, 1417 },
1250 [OP_COMMIT] = { 1418 [OP_COMMIT] = {
1251 .op_func = (nfsd4op_func)nfsd4_commit, 1419 .op_func = (nfsd4op_func)nfsd4_commit,
1420 .op_flags = OP_MODIFIES_SOMETHING,
1252 .op_name = "OP_COMMIT", 1421 .op_name = "OP_COMMIT",
1422 .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
1253 }, 1423 },
1254 [OP_CREATE] = { 1424 [OP_CREATE] = {
1255 .op_func = (nfsd4op_func)nfsd4_create, 1425 .op_func = (nfsd4op_func)nfsd4_create,
1426 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1256 .op_name = "OP_CREATE", 1427 .op_name = "OP_CREATE",
1257 .op_cacheresult = true, 1428 .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
1258 }, 1429 },
1259 [OP_DELEGRETURN] = { 1430 [OP_DELEGRETURN] = {
1260 .op_func = (nfsd4op_func)nfsd4_delegreturn, 1431 .op_func = (nfsd4op_func)nfsd4_delegreturn,
1432 .op_flags = OP_MODIFIES_SOMETHING,
1261 .op_name = "OP_DELEGRETURN", 1433 .op_name = "OP_DELEGRETURN",
1434 .op_rsize_bop = nfsd4_only_status_rsize,
1262 }, 1435 },
1263 [OP_GETATTR] = { 1436 [OP_GETATTR] = {
1264 .op_func = (nfsd4op_func)nfsd4_getattr, 1437 .op_func = (nfsd4op_func)nfsd4_getattr,
@@ -1271,12 +1444,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
1271 }, 1444 },
1272 [OP_LINK] = { 1445 [OP_LINK] = {
1273 .op_func = (nfsd4op_func)nfsd4_link, 1446 .op_func = (nfsd4op_func)nfsd4_link,
1447 .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
1448 | OP_CACHEME,
1274 .op_name = "OP_LINK", 1449 .op_name = "OP_LINK",
1275 .op_cacheresult = true, 1450 .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
1276 }, 1451 },
1277 [OP_LOCK] = { 1452 [OP_LOCK] = {
1278 .op_func = (nfsd4op_func)nfsd4_lock, 1453 .op_func = (nfsd4op_func)nfsd4_lock,
1454 .op_flags = OP_MODIFIES_SOMETHING,
1279 .op_name = "OP_LOCK", 1455 .op_name = "OP_LOCK",
1456 .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
1280 }, 1457 },
1281 [OP_LOCKT] = { 1458 [OP_LOCKT] = {
1282 .op_func = (nfsd4op_func)nfsd4_lockt, 1459 .op_func = (nfsd4op_func)nfsd4_lockt,
@@ -1284,7 +1461,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
1284 }, 1461 },
1285 [OP_LOCKU] = { 1462 [OP_LOCKU] = {
1286 .op_func = (nfsd4op_func)nfsd4_locku, 1463 .op_func = (nfsd4op_func)nfsd4_locku,
1464 .op_flags = OP_MODIFIES_SOMETHING,
1287 .op_name = "OP_LOCKU", 1465 .op_name = "OP_LOCKU",
1466 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1288 }, 1467 },
1289 [OP_LOOKUP] = { 1468 [OP_LOOKUP] = {
1290 .op_func = (nfsd4op_func)nfsd4_lookup, 1469 .op_func = (nfsd4op_func)nfsd4_lookup,
@@ -1302,42 +1481,54 @@ static struct nfsd4_operation nfsd4_ops[] = {
1302 }, 1481 },
1303 [OP_OPEN] = { 1482 [OP_OPEN] = {
1304 .op_func = (nfsd4op_func)nfsd4_open, 1483 .op_func = (nfsd4op_func)nfsd4_open,
1305 .op_flags = OP_HANDLES_WRONGSEC, 1484 .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
1306 .op_name = "OP_OPEN", 1485 .op_name = "OP_OPEN",
1486 .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
1307 }, 1487 },
1308 [OP_OPEN_CONFIRM] = { 1488 [OP_OPEN_CONFIRM] = {
1309 .op_func = (nfsd4op_func)nfsd4_open_confirm, 1489 .op_func = (nfsd4op_func)nfsd4_open_confirm,
1490 .op_flags = OP_MODIFIES_SOMETHING,
1310 .op_name = "OP_OPEN_CONFIRM", 1491 .op_name = "OP_OPEN_CONFIRM",
1492 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1311 }, 1493 },
1312 [OP_OPEN_DOWNGRADE] = { 1494 [OP_OPEN_DOWNGRADE] = {
1313 .op_func = (nfsd4op_func)nfsd4_open_downgrade, 1495 .op_func = (nfsd4op_func)nfsd4_open_downgrade,
1496 .op_flags = OP_MODIFIES_SOMETHING,
1314 .op_name = "OP_OPEN_DOWNGRADE", 1497 .op_name = "OP_OPEN_DOWNGRADE",
1498 .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
1315 }, 1499 },
1316 [OP_PUTFH] = { 1500 [OP_PUTFH] = {
1317 .op_func = (nfsd4op_func)nfsd4_putfh, 1501 .op_func = (nfsd4op_func)nfsd4_putfh,
1318 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1502 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1319 | OP_IS_PUTFH_LIKE, 1503 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
1320 .op_name = "OP_PUTFH", 1504 .op_name = "OP_PUTFH",
1505 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1321 }, 1506 },
1322 [OP_PUTPUBFH] = { 1507 [OP_PUTPUBFH] = {
1323 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1508 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1324 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1509 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1325 | OP_IS_PUTFH_LIKE, 1510 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
1326 .op_name = "OP_PUTPUBFH", 1511 .op_name = "OP_PUTPUBFH",
1512 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1327 }, 1513 },
1328 [OP_PUTROOTFH] = { 1514 [OP_PUTROOTFH] = {
1329 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1515 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1330 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1516 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1331 | OP_IS_PUTFH_LIKE, 1517 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
1332 .op_name = "OP_PUTROOTFH", 1518 .op_name = "OP_PUTROOTFH",
1519 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1333 }, 1520 },
1334 [OP_READ] = { 1521 [OP_READ] = {
1335 .op_func = (nfsd4op_func)nfsd4_read, 1522 .op_func = (nfsd4op_func)nfsd4_read,
1523 .op_flags = OP_MODIFIES_SOMETHING,
1336 .op_name = "OP_READ", 1524 .op_name = "OP_READ",
1525 .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
1337 }, 1526 },
1338 [OP_READDIR] = { 1527 [OP_READDIR] = {
1339 .op_func = (nfsd4op_func)nfsd4_readdir, 1528 .op_func = (nfsd4op_func)nfsd4_readdir,
1529 .op_flags = OP_MODIFIES_SOMETHING,
1340 .op_name = "OP_READDIR", 1530 .op_name = "OP_READDIR",
1531 .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
1341 }, 1532 },
1342 [OP_READLINK] = { 1533 [OP_READLINK] = {
1343 .op_func = (nfsd4op_func)nfsd4_readlink, 1534 .op_func = (nfsd4op_func)nfsd4_readlink,
@@ -1345,29 +1536,36 @@ static struct nfsd4_operation nfsd4_ops[] = {
1345 }, 1536 },
1346 [OP_REMOVE] = { 1537 [OP_REMOVE] = {
1347 .op_func = (nfsd4op_func)nfsd4_remove, 1538 .op_func = (nfsd4op_func)nfsd4_remove,
1539 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1348 .op_name = "OP_REMOVE", 1540 .op_name = "OP_REMOVE",
1349 .op_cacheresult = true, 1541 .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
1350 }, 1542 },
1351 [OP_RENAME] = { 1543 [OP_RENAME] = {
1352 .op_name = "OP_RENAME",
1353 .op_func = (nfsd4op_func)nfsd4_rename, 1544 .op_func = (nfsd4op_func)nfsd4_rename,
1354 .op_cacheresult = true, 1545 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1546 .op_name = "OP_RENAME",
1547 .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
1355 }, 1548 },
1356 [OP_RENEW] = { 1549 [OP_RENEW] = {
1357 .op_func = (nfsd4op_func)nfsd4_renew, 1550 .op_func = (nfsd4op_func)nfsd4_renew,
1358 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1551 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1552 | OP_MODIFIES_SOMETHING,
1359 .op_name = "OP_RENEW", 1553 .op_name = "OP_RENEW",
1554 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1555
1360 }, 1556 },
1361 [OP_RESTOREFH] = { 1557 [OP_RESTOREFH] = {
1362 .op_func = (nfsd4op_func)nfsd4_restorefh, 1558 .op_func = (nfsd4op_func)nfsd4_restorefh,
1363 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1559 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1364 | OP_IS_PUTFH_LIKE, 1560 | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
1365 .op_name = "OP_RESTOREFH", 1561 .op_name = "OP_RESTOREFH",
1562 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1366 }, 1563 },
1367 [OP_SAVEFH] = { 1564 [OP_SAVEFH] = {
1368 .op_func = (nfsd4op_func)nfsd4_savefh, 1565 .op_func = (nfsd4op_func)nfsd4_savefh,
1369 .op_flags = OP_HANDLES_WRONGSEC, 1566 .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
1370 .op_name = "OP_SAVEFH", 1567 .op_name = "OP_SAVEFH",
1568 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1371 }, 1569 },
1372 [OP_SECINFO] = { 1570 [OP_SECINFO] = {
1373 .op_func = (nfsd4op_func)nfsd4_secinfo, 1571 .op_func = (nfsd4op_func)nfsd4_secinfo,
@@ -1377,19 +1575,22 @@ static struct nfsd4_operation nfsd4_ops[] = {
1377 [OP_SETATTR] = { 1575 [OP_SETATTR] = {
1378 .op_func = (nfsd4op_func)nfsd4_setattr, 1576 .op_func = (nfsd4op_func)nfsd4_setattr,
1379 .op_name = "OP_SETATTR", 1577 .op_name = "OP_SETATTR",
1380 .op_cacheresult = true, 1578 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1579 .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
1381 }, 1580 },
1382 [OP_SETCLIENTID] = { 1581 [OP_SETCLIENTID] = {
1383 .op_func = (nfsd4op_func)nfsd4_setclientid, 1582 .op_func = (nfsd4op_func)nfsd4_setclientid,
1384 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1583 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1584 | OP_MODIFIES_SOMETHING | OP_CACHEME,
1385 .op_name = "OP_SETCLIENTID", 1585 .op_name = "OP_SETCLIENTID",
1386 .op_cacheresult = true, 1586 .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
1387 }, 1587 },
1388 [OP_SETCLIENTID_CONFIRM] = { 1588 [OP_SETCLIENTID_CONFIRM] = {
1389 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, 1589 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
1390 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1590 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1591 | OP_MODIFIES_SOMETHING | OP_CACHEME,
1391 .op_name = "OP_SETCLIENTID_CONFIRM", 1592 .op_name = "OP_SETCLIENTID_CONFIRM",
1392 .op_cacheresult = true, 1593 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1393 }, 1594 },
1394 [OP_VERIFY] = { 1595 [OP_VERIFY] = {
1395 .op_func = (nfsd4op_func)nfsd4_verify, 1596 .op_func = (nfsd4op_func)nfsd4_verify,
@@ -1397,35 +1598,46 @@ static struct nfsd4_operation nfsd4_ops[] = {
1397 }, 1598 },
1398 [OP_WRITE] = { 1599 [OP_WRITE] = {
1399 .op_func = (nfsd4op_func)nfsd4_write, 1600 .op_func = (nfsd4op_func)nfsd4_write,
1601 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
1400 .op_name = "OP_WRITE", 1602 .op_name = "OP_WRITE",
1401 .op_cacheresult = true, 1603 .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
1402 }, 1604 },
1403 [OP_RELEASE_LOCKOWNER] = { 1605 [OP_RELEASE_LOCKOWNER] = {
1404 .op_func = (nfsd4op_func)nfsd4_release_lockowner, 1606 .op_func = (nfsd4op_func)nfsd4_release_lockowner,
1405 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1607 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
1608 | OP_MODIFIES_SOMETHING,
1406 .op_name = "OP_RELEASE_LOCKOWNER", 1609 .op_name = "OP_RELEASE_LOCKOWNER",
1610 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1407 }, 1611 },
1408 1612
1409 /* NFSv4.1 operations */ 1613 /* NFSv4.1 operations */
1410 [OP_EXCHANGE_ID] = { 1614 [OP_EXCHANGE_ID] = {
1411 .op_func = (nfsd4op_func)nfsd4_exchange_id, 1615 .op_func = (nfsd4op_func)nfsd4_exchange_id,
1412 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1616 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
1617 | OP_MODIFIES_SOMETHING,
1413 .op_name = "OP_EXCHANGE_ID", 1618 .op_name = "OP_EXCHANGE_ID",
1619 .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
1414 }, 1620 },
1415 [OP_BIND_CONN_TO_SESSION] = { 1621 [OP_BIND_CONN_TO_SESSION] = {
1416 .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, 1622 .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
1417 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1623 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
1624 | OP_MODIFIES_SOMETHING,
1418 .op_name = "OP_BIND_CONN_TO_SESSION", 1625 .op_name = "OP_BIND_CONN_TO_SESSION",
1626 .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
1419 }, 1627 },
1420 [OP_CREATE_SESSION] = { 1628 [OP_CREATE_SESSION] = {
1421 .op_func = (nfsd4op_func)nfsd4_create_session, 1629 .op_func = (nfsd4op_func)nfsd4_create_session,
1422 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1630 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
1631 | OP_MODIFIES_SOMETHING,
1423 .op_name = "OP_CREATE_SESSION", 1632 .op_name = "OP_CREATE_SESSION",
1633 .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
1424 }, 1634 },
1425 [OP_DESTROY_SESSION] = { 1635 [OP_DESTROY_SESSION] = {
1426 .op_func = (nfsd4op_func)nfsd4_destroy_session, 1636 .op_func = (nfsd4op_func)nfsd4_destroy_session,
1427 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1637 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
1638 | OP_MODIFIES_SOMETHING,
1428 .op_name = "OP_DESTROY_SESSION", 1639 .op_name = "OP_DESTROY_SESSION",
1640 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1429 }, 1641 },
1430 [OP_SEQUENCE] = { 1642 [OP_SEQUENCE] = {
1431 .op_func = (nfsd4op_func)nfsd4_sequence, 1643 .op_func = (nfsd4op_func)nfsd4_sequence,
@@ -1433,14 +1645,17 @@ static struct nfsd4_operation nfsd4_ops[] = {
1433 .op_name = "OP_SEQUENCE", 1645 .op_name = "OP_SEQUENCE",
1434 }, 1646 },
1435 [OP_DESTROY_CLIENTID] = { 1647 [OP_DESTROY_CLIENTID] = {
1436 .op_func = NULL, 1648 .op_func = (nfsd4op_func)nfsd4_destroy_clientid,
1437 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1649 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
1650 | OP_MODIFIES_SOMETHING,
1438 .op_name = "OP_DESTROY_CLIENTID", 1651 .op_name = "OP_DESTROY_CLIENTID",
1652 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1439 }, 1653 },
1440 [OP_RECLAIM_COMPLETE] = { 1654 [OP_RECLAIM_COMPLETE] = {
1441 .op_func = (nfsd4op_func)nfsd4_reclaim_complete, 1655 .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
1442 .op_flags = ALLOWED_WITHOUT_FH, 1656 .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
1443 .op_name = "OP_RECLAIM_COMPLETE", 1657 .op_name = "OP_RECLAIM_COMPLETE",
1658 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1444 }, 1659 },
1445 [OP_SECINFO_NO_NAME] = { 1660 [OP_SECINFO_NO_NAME] = {
1446 .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, 1661 .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
@@ -1454,8 +1669,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
1454 }, 1669 },
1455 [OP_FREE_STATEID] = { 1670 [OP_FREE_STATEID] = {
1456 .op_func = (nfsd4op_func)nfsd4_free_stateid, 1671 .op_func = (nfsd4op_func)nfsd4_free_stateid,
1457 .op_flags = ALLOWED_WITHOUT_FH, 1672 .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
1458 .op_name = "OP_FREE_STATEID", 1673 .op_name = "OP_FREE_STATEID",
1674 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1459 }, 1675 },
1460}; 1676};
1461 1677
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 29d77f60585b..ed083b9a731b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -45,6 +45,7 @@
45 45
46/* Globals */ 46/* Globals */
47static struct file *rec_file; 47static struct file *rec_file;
48static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
48 49
49static int 50static int
50nfs4_save_creds(const struct cred **original_creds) 51nfs4_save_creds(const struct cred **original_creds)
@@ -88,7 +89,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88 struct xdr_netobj cksum; 89 struct xdr_netobj cksum;
89 struct hash_desc desc; 90 struct hash_desc desc;
90 struct scatterlist sg; 91 struct scatterlist sg;
91 __be32 status = nfserr_resource; 92 __be32 status = nfserr_jukebox;
92 93
93 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
94 clname->len, clname->data); 95 clname->len, clname->data);
@@ -129,6 +130,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
129 if (!rec_file || clp->cl_firststate) 130 if (!rec_file || clp->cl_firststate)
130 return 0; 131 return 0;
131 132
133 clp->cl_firststate = 1;
132 status = nfs4_save_creds(&original_cred); 134 status = nfs4_save_creds(&original_cred);
133 if (status < 0) 135 if (status < 0)
134 return status; 136 return status;
@@ -143,10 +145,8 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
143 goto out_unlock; 145 goto out_unlock;
144 } 146 }
145 status = -EEXIST; 147 status = -EEXIST;
146 if (dentry->d_inode) { 148 if (dentry->d_inode)
147 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
148 goto out_put; 149 goto out_put;
149 }
150 status = mnt_want_write(rec_file->f_path.mnt); 150 status = mnt_want_write(rec_file->f_path.mnt);
151 if (status) 151 if (status)
152 goto out_put; 152 goto out_put;
@@ -156,12 +156,14 @@ out_put:
156 dput(dentry); 156 dput(dentry);
157out_unlock: 157out_unlock:
158 mutex_unlock(&dir->d_inode->i_mutex); 158 mutex_unlock(&dir->d_inode->i_mutex);
159 if (status == 0) { 159 if (status == 0)
160 clp->cl_firststate = 1;
161 vfs_fsync(rec_file, 0); 160 vfs_fsync(rec_file, 0);
162 } 161 else
162 printk(KERN_ERR "NFSD: failed to write recovery record"
163 " (err %d); please check that %s exists"
164 " and is writeable", status,
165 user_recovery_dirname);
163 nfs4_reset_creds(original_cred); 166 nfs4_reset_creds(original_cred);
164 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
165 return status; 167 return status;
166} 168}
167 169
@@ -354,13 +356,13 @@ nfsd4_recdir_load(void) {
354 */ 356 */
355 357
356void 358void
357nfsd4_init_recdir(char *rec_dirname) 359nfsd4_init_recdir()
358{ 360{
359 const struct cred *original_cred; 361 const struct cred *original_cred;
360 int status; 362 int status;
361 363
362 printk("NFSD: Using %s as the NFSv4 state recovery directory\n", 364 printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
363 rec_dirname); 365 user_recovery_dirname);
364 366
365 BUG_ON(rec_file); 367 BUG_ON(rec_file);
366 368
@@ -372,10 +374,10 @@ nfsd4_init_recdir(char *rec_dirname)
372 return; 374 return;
373 } 375 }
374 376
375 rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0); 377 rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
376 if (IS_ERR(rec_file)) { 378 if (IS_ERR(rec_file)) {
377 printk("NFSD: unable to find recovery directory %s\n", 379 printk("NFSD: unable to find recovery directory %s\n",
378 rec_dirname); 380 user_recovery_dirname);
379 rec_file = NULL; 381 rec_file = NULL;
380 } 382 }
381 383
@@ -390,3 +392,30 @@ nfsd4_shutdown_recdir(void)
390 fput(rec_file); 392 fput(rec_file);
391 rec_file = NULL; 393 rec_file = NULL;
392} 394}
395
396/*
397 * Change the NFSv4 recovery directory to recdir.
398 */
399int
400nfs4_reset_recoverydir(char *recdir)
401{
402 int status;
403 struct path path;
404
405 status = kern_path(recdir, LOOKUP_FOLLOW, &path);
406 if (status)
407 return status;
408 status = -ENOTDIR;
409 if (S_ISDIR(path.dentry->d_inode->i_mode)) {
410 strcpy(user_recovery_dirname, recdir);
411 status = 0;
412 }
413 path_put(&path);
414 return status;
415}
416
417char *
418nfs4_recoverydir(void)
419{
420 return user_recovery_dirname;
421}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3787ec117400..47e94e33a975 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,9 +49,6 @@
49time_t nfsd4_lease = 90; /* default lease time */ 49time_t nfsd4_lease = 90; /* default lease time */
50time_t nfsd4_grace = 90; 50time_t nfsd4_grace = 90;
51static time_t boot_time; 51static time_t boot_time;
52static u32 current_ownerid = 1;
53static u32 current_fileid = 1;
54static u32 current_delegid = 1;
55static stateid_t zerostateid; /* bits all 0 */ 52static stateid_t zerostateid; /* bits all 0 */
56static stateid_t onestateid; /* bits all 1 */ 53static stateid_t onestateid; /* bits all 1 */
57static u64 current_sessionid = 1; 54static u64 current_sessionid = 1;
@@ -60,13 +57,7 @@ static u64 current_sessionid = 1;
60#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) 57#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
61 58
62/* forward declarations */ 59/* forward declarations */
63static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); 60static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
64static struct nfs4_stateid * search_for_stateid(stateid_t *stid);
65static struct nfs4_delegation * search_for_delegation(stateid_t *stid);
66static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
67static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
68static void nfs4_set_recdir(char *recdir);
69static int check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner);
70 61
71/* Locking: */ 62/* Locking: */
72 63
@@ -80,7 +71,8 @@ static DEFINE_MUTEX(client_mutex);
80 */ 71 */
81static DEFINE_SPINLOCK(recall_lock); 72static DEFINE_SPINLOCK(recall_lock);
82 73
83static struct kmem_cache *stateowner_slab = NULL; 74static struct kmem_cache *openowner_slab = NULL;
75static struct kmem_cache *lockowner_slab = NULL;
84static struct kmem_cache *file_slab = NULL; 76static struct kmem_cache *file_slab = NULL;
85static struct kmem_cache *stateid_slab = NULL; 77static struct kmem_cache *stateid_slab = NULL;
86static struct kmem_cache *deleg_slab = NULL; 78static struct kmem_cache *deleg_slab = NULL;
@@ -112,6 +104,11 @@ opaque_hashval(const void *ptr, int nbytes)
112 104
113static struct list_head del_recall_lru; 105static struct list_head del_recall_lru;
114 106
107static void nfsd4_free_file(struct nfs4_file *f)
108{
109 kmem_cache_free(file_slab, f);
110}
111
115static inline void 112static inline void
116put_nfs4_file(struct nfs4_file *fi) 113put_nfs4_file(struct nfs4_file *fi)
117{ 114{
@@ -119,7 +116,7 @@ put_nfs4_file(struct nfs4_file *fi)
119 list_del(&fi->fi_hash); 116 list_del(&fi->fi_hash);
120 spin_unlock(&recall_lock); 117 spin_unlock(&recall_lock);
121 iput(fi->fi_inode); 118 iput(fi->fi_inode);
122 kmem_cache_free(file_slab, fi); 119 nfsd4_free_file(fi);
123 } 120 }
124} 121}
125 122
@@ -136,35 +133,33 @@ unsigned int max_delegations;
136 * Open owner state (share locks) 133 * Open owner state (share locks)
137 */ 134 */
138 135
139/* hash tables for nfs4_stateowner */ 136/* hash tables for open owners */
140#define OWNER_HASH_BITS 8 137#define OPEN_OWNER_HASH_BITS 8
141#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) 138#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS)
142#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) 139#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1)
143 140
144#define ownerid_hashval(id) \ 141static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
145 ((id) & OWNER_HASH_MASK) 142{
146#define ownerstr_hashval(clientid, ownername) \ 143 unsigned int ret;
147 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
148 144
149static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE]; 145 ret = opaque_hashval(ownername->data, ownername->len);
150static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; 146 ret += clientid;
147 return ret & OPEN_OWNER_HASH_MASK;
148}
149
150static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
151 151
152/* hash table for nfs4_file */ 152/* hash table for nfs4_file */
153#define FILE_HASH_BITS 8 153#define FILE_HASH_BITS 8
154#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) 154#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
155 155
156/* hash table for (open)nfs4_stateid */ 156static unsigned int file_hashval(struct inode *ino)
157#define STATEID_HASH_BITS 10 157{
158#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) 158 /* XXX: why are we hashing on inode pointer, anyway? */
159#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1) 159 return hash_ptr(ino, FILE_HASH_BITS);
160 160}
161#define file_hashval(x) \
162 hash_ptr(x, FILE_HASH_BITS)
163#define stateid_hashval(owner_id, file_id) \
164 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
165 161
166static struct list_head file_hashtbl[FILE_HASH_SIZE]; 162static struct list_head file_hashtbl[FILE_HASH_SIZE];
167static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
168 163
169static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) 164static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
170{ 165{
@@ -192,8 +187,15 @@ static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
192static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) 187static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
193{ 188{
194 if (atomic_dec_and_test(&fp->fi_access[oflag])) { 189 if (atomic_dec_and_test(&fp->fi_access[oflag])) {
195 nfs4_file_put_fd(fp, O_RDWR);
196 nfs4_file_put_fd(fp, oflag); 190 nfs4_file_put_fd(fp, oflag);
191 /*
192 * It's also safe to get rid of the RDWR open *if*
193 * we no longer have need of the other kind of access
194 * or if we already have the other kind of open:
195 */
196 if (fp->fi_fds[1-oflag]
197 || atomic_read(&fp->fi_access[1 - oflag]) == 0)
198 nfs4_file_put_fd(fp, O_RDWR);
197 } 199 }
198} 200}
199 201
@@ -206,8 +208,73 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
206 __nfs4_file_put_access(fp, oflag); 208 __nfs4_file_put_access(fp, oflag);
207} 209}
208 210
211static inline int get_new_stid(struct nfs4_stid *stid)
212{
213 static int min_stateid = 0;
214 struct idr *stateids = &stid->sc_client->cl_stateids;
215 int new_stid;
216 int error;
217
218 error = idr_get_new_above(stateids, stid, min_stateid, &new_stid);
219 /*
220 * Note: the necessary preallocation was done in
221 * nfs4_alloc_stateid(). The idr code caps the number of
222 * preallocations that can exist at a time, but the state lock
223 * prevents anyone from using ours before we get here:
224 */
225 BUG_ON(error);
226 /*
227 * It shouldn't be a problem to reuse an opaque stateid value.
228 * I don't think it is for 4.1. But with 4.0 I worry that, for
229 * example, a stray write retransmission could be accepted by
230 * the server when it should have been rejected. Therefore,
231 * adopt a trick from the sctp code to attempt to maximize the
232 * amount of time until an id is reused, by ensuring they always
233 * "increase" (mod INT_MAX):
234 */
235
236 min_stateid = new_stid+1;
237 if (min_stateid == INT_MAX)
238 min_stateid = 0;
239 return new_stid;
240}
241
242static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type)
243{
244 stateid_t *s = &stid->sc_stateid;
245 int new_id;
246
247 stid->sc_type = type;
248 stid->sc_client = cl;
249 s->si_opaque.so_clid = cl->cl_clientid;
250 new_id = get_new_stid(stid);
251 s->si_opaque.so_id = (u32)new_id;
252 /* Will be incremented before return to client: */
253 s->si_generation = 0;
254}
255
256static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab)
257{
258 struct idr *stateids = &cl->cl_stateids;
259
260 if (!idr_pre_get(stateids, GFP_KERNEL))
261 return NULL;
262 /*
263 * Note: if we fail here (or any time between now and the time
264 * we actually get the new idr), we won't need to undo the idr
265 * preallocation, since the idr code caps the number of
266 * preallocated entries.
267 */
268 return kmem_cache_alloc(slab, GFP_KERNEL);
269}
270
271static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
272{
273 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
274}
275
209static struct nfs4_delegation * 276static struct nfs4_delegation *
210alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 277alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type)
211{ 278{
212 struct nfs4_delegation *dp; 279 struct nfs4_delegation *dp;
213 struct nfs4_file *fp = stp->st_file; 280 struct nfs4_file *fp = stp->st_file;
@@ -224,21 +291,23 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
224 return NULL; 291 return NULL;
225 if (num_delegations > max_delegations) 292 if (num_delegations > max_delegations)
226 return NULL; 293 return NULL;
227 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); 294 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
228 if (dp == NULL) 295 if (dp == NULL)
229 return dp; 296 return dp;
297 init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID);
298 /*
299 * delegation seqid's are never incremented. The 4.1 special
300 * meaning of seqid 0 isn't meaningful, really, but let's avoid
301 * 0 anyway just for consistency and use 1:
302 */
303 dp->dl_stid.sc_stateid.si_generation = 1;
230 num_delegations++; 304 num_delegations++;
231 INIT_LIST_HEAD(&dp->dl_perfile); 305 INIT_LIST_HEAD(&dp->dl_perfile);
232 INIT_LIST_HEAD(&dp->dl_perclnt); 306 INIT_LIST_HEAD(&dp->dl_perclnt);
233 INIT_LIST_HEAD(&dp->dl_recall_lru); 307 INIT_LIST_HEAD(&dp->dl_recall_lru);
234 dp->dl_client = clp;
235 get_nfs4_file(fp); 308 get_nfs4_file(fp);
236 dp->dl_file = fp; 309 dp->dl_file = fp;
237 dp->dl_type = type; 310 dp->dl_type = type;
238 dp->dl_stateid.si_boot = boot_time;
239 dp->dl_stateid.si_stateownerid = current_delegid++;
240 dp->dl_stateid.si_fileid = 0;
241 dp->dl_stateid.si_generation = 0;
242 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 311 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
243 dp->dl_time = 0; 312 dp->dl_time = 0;
244 atomic_set(&dp->dl_count, 1); 313 atomic_set(&dp->dl_count, 1);
@@ -267,10 +336,18 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
267 } 336 }
268} 337}
269 338
339static void unhash_stid(struct nfs4_stid *s)
340{
341 struct idr *stateids = &s->sc_client->cl_stateids;
342
343 idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
344}
345
270/* Called under the state lock. */ 346/* Called under the state lock. */
271static void 347static void
272unhash_delegation(struct nfs4_delegation *dp) 348unhash_delegation(struct nfs4_delegation *dp)
273{ 349{
350 unhash_stid(&dp->dl_stid);
274 list_del_init(&dp->dl_perclnt); 351 list_del_init(&dp->dl_perclnt);
275 spin_lock(&recall_lock); 352 spin_lock(&recall_lock);
276 list_del_init(&dp->dl_perfile); 353 list_del_init(&dp->dl_perfile);
@@ -292,10 +369,16 @@ static DEFINE_SPINLOCK(client_lock);
292#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) 369#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
293#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) 370#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
294 371
295#define clientid_hashval(id) \ 372static unsigned int clientid_hashval(u32 id)
296 ((id) & CLIENT_HASH_MASK) 373{
297#define clientstr_hashval(name) \ 374 return id & CLIENT_HASH_MASK;
298 (opaque_hashval((name), 8) & CLIENT_HASH_MASK) 375}
376
377static unsigned int clientstr_hashval(const char *name)
378{
379 return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
380}
381
299/* 382/*
300 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot 383 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
301 * used in reboot/reset lease grace period processing 384 * used in reboot/reset lease grace period processing
@@ -362,7 +445,7 @@ set_deny(unsigned int *deny, unsigned long bmap) {
362} 445}
363 446
364static int 447static int
365test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { 448test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
366 unsigned int access, deny; 449 unsigned int access, deny;
367 450
368 set_access(&access, stp->st_access_bmap); 451 set_access(&access, stp->st_access_bmap);
@@ -385,14 +468,13 @@ static int nfs4_access_to_omode(u32 access)
385 BUG(); 468 BUG();
386} 469}
387 470
388static void unhash_generic_stateid(struct nfs4_stateid *stp) 471static void unhash_generic_stateid(struct nfs4_ol_stateid *stp)
389{ 472{
390 list_del(&stp->st_hash);
391 list_del(&stp->st_perfile); 473 list_del(&stp->st_perfile);
392 list_del(&stp->st_perstateowner); 474 list_del(&stp->st_perstateowner);
393} 475}
394 476
395static void free_generic_stateid(struct nfs4_stateid *stp) 477static void close_generic_stateid(struct nfs4_ol_stateid *stp)
396{ 478{
397 int i; 479 int i;
398 480
@@ -401,84 +483,106 @@ static void free_generic_stateid(struct nfs4_stateid *stp)
401 if (test_bit(i, &stp->st_access_bmap)) 483 if (test_bit(i, &stp->st_access_bmap))
402 nfs4_file_put_access(stp->st_file, 484 nfs4_file_put_access(stp->st_file,
403 nfs4_access_to_omode(i)); 485 nfs4_access_to_omode(i));
486 __clear_bit(i, &stp->st_access_bmap);
404 } 487 }
405 } 488 }
406 put_nfs4_file(stp->st_file); 489 put_nfs4_file(stp->st_file);
490 stp->st_file = NULL;
491}
492
493static void free_generic_stateid(struct nfs4_ol_stateid *stp)
494{
407 kmem_cache_free(stateid_slab, stp); 495 kmem_cache_free(stateid_slab, stp);
408} 496}
409 497
410static void release_lock_stateid(struct nfs4_stateid *stp) 498static void release_lock_stateid(struct nfs4_ol_stateid *stp)
411{ 499{
412 struct file *file; 500 struct file *file;
413 501
414 unhash_generic_stateid(stp); 502 unhash_generic_stateid(stp);
503 unhash_stid(&stp->st_stid);
415 file = find_any_file(stp->st_file); 504 file = find_any_file(stp->st_file);
416 if (file) 505 if (file)
417 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); 506 locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
507 close_generic_stateid(stp);
418 free_generic_stateid(stp); 508 free_generic_stateid(stp);
419} 509}
420 510
421static void unhash_lockowner(struct nfs4_stateowner *sop) 511static void unhash_lockowner(struct nfs4_lockowner *lo)
422{ 512{
423 struct nfs4_stateid *stp; 513 struct nfs4_ol_stateid *stp;
424 514
425 list_del(&sop->so_idhash); 515 list_del(&lo->lo_owner.so_strhash);
426 list_del(&sop->so_strhash); 516 list_del(&lo->lo_perstateid);
427 list_del(&sop->so_perstateid); 517 while (!list_empty(&lo->lo_owner.so_stateids)) {
428 while (!list_empty(&sop->so_stateids)) { 518 stp = list_first_entry(&lo->lo_owner.so_stateids,
429 stp = list_first_entry(&sop->so_stateids, 519 struct nfs4_ol_stateid, st_perstateowner);
430 struct nfs4_stateid, st_perstateowner);
431 release_lock_stateid(stp); 520 release_lock_stateid(stp);
432 } 521 }
433} 522}
434 523
435static void release_lockowner(struct nfs4_stateowner *sop) 524static void release_lockowner(struct nfs4_lockowner *lo)
436{ 525{
437 unhash_lockowner(sop); 526 unhash_lockowner(lo);
438 nfs4_put_stateowner(sop); 527 nfs4_free_lockowner(lo);
439} 528}
440 529
441static void 530static void
442release_stateid_lockowners(struct nfs4_stateid *open_stp) 531release_stateid_lockowners(struct nfs4_ol_stateid *open_stp)
443{ 532{
444 struct nfs4_stateowner *lock_sop; 533 struct nfs4_lockowner *lo;
445 534
446 while (!list_empty(&open_stp->st_lockowners)) { 535 while (!list_empty(&open_stp->st_lockowners)) {
447 lock_sop = list_entry(open_stp->st_lockowners.next, 536 lo = list_entry(open_stp->st_lockowners.next,
448 struct nfs4_stateowner, so_perstateid); 537 struct nfs4_lockowner, lo_perstateid);
449 /* list_del(&open_stp->st_lockowners); */ 538 release_lockowner(lo);
450 BUG_ON(lock_sop->so_is_open_owner);
451 release_lockowner(lock_sop);
452 } 539 }
453} 540}
454 541
455static void release_open_stateid(struct nfs4_stateid *stp) 542static void unhash_open_stateid(struct nfs4_ol_stateid *stp)
456{ 543{
457 unhash_generic_stateid(stp); 544 unhash_generic_stateid(stp);
458 release_stateid_lockowners(stp); 545 release_stateid_lockowners(stp);
546 close_generic_stateid(stp);
547}
548
549static void release_open_stateid(struct nfs4_ol_stateid *stp)
550{
551 unhash_open_stateid(stp);
552 unhash_stid(&stp->st_stid);
459 free_generic_stateid(stp); 553 free_generic_stateid(stp);
460} 554}
461 555
462static void unhash_openowner(struct nfs4_stateowner *sop) 556static void unhash_openowner(struct nfs4_openowner *oo)
463{ 557{
464 struct nfs4_stateid *stp; 558 struct nfs4_ol_stateid *stp;
465 559
466 list_del(&sop->so_idhash); 560 list_del(&oo->oo_owner.so_strhash);
467 list_del(&sop->so_strhash); 561 list_del(&oo->oo_perclient);
468 list_del(&sop->so_perclient); 562 while (!list_empty(&oo->oo_owner.so_stateids)) {
469 list_del(&sop->so_perstateid); /* XXX: necessary? */ 563 stp = list_first_entry(&oo->oo_owner.so_stateids,
470 while (!list_empty(&sop->so_stateids)) { 564 struct nfs4_ol_stateid, st_perstateowner);
471 stp = list_first_entry(&sop->so_stateids,
472 struct nfs4_stateid, st_perstateowner);
473 release_open_stateid(stp); 565 release_open_stateid(stp);
474 } 566 }
475} 567}
476 568
477static void release_openowner(struct nfs4_stateowner *sop) 569static void release_last_closed_stateid(struct nfs4_openowner *oo)
478{ 570{
479 unhash_openowner(sop); 571 struct nfs4_ol_stateid *s = oo->oo_last_closed_stid;
480 list_del(&sop->so_close_lru); 572
481 nfs4_put_stateowner(sop); 573 if (s) {
574 unhash_stid(&s->st_stid);
575 free_generic_stateid(s);
576 oo->oo_last_closed_stid = NULL;
577 }
578}
579
580static void release_openowner(struct nfs4_openowner *oo)
581{
582 unhash_openowner(oo);
583 list_del(&oo->oo_close_lru);
584 release_last_closed_stateid(oo);
585 nfs4_free_openowner(oo);
482} 586}
483 587
484#define SESSION_HASH_SIZE 512 588#define SESSION_HASH_SIZE 512
@@ -843,9 +947,6 @@ renew_client_locked(struct nfs4_client *clp)
843 return; 947 return;
844 } 948 }
845 949
846 /*
847 * Move client to the end to the LRU list.
848 */
849 dprintk("renewing client (clientid %08x/%08x)\n", 950 dprintk("renewing client (clientid %08x/%08x)\n",
850 clp->cl_clientid.cl_boot, 951 clp->cl_clientid.cl_boot,
851 clp->cl_clientid.cl_id); 952 clp->cl_clientid.cl_id);
@@ -943,7 +1044,7 @@ unhash_client_locked(struct nfs4_client *clp)
943static void 1044static void
944expire_client(struct nfs4_client *clp) 1045expire_client(struct nfs4_client *clp)
945{ 1046{
946 struct nfs4_stateowner *sop; 1047 struct nfs4_openowner *oo;
947 struct nfs4_delegation *dp; 1048 struct nfs4_delegation *dp;
948 struct list_head reaplist; 1049 struct list_head reaplist;
949 1050
@@ -961,8 +1062,8 @@ expire_client(struct nfs4_client *clp)
961 unhash_delegation(dp); 1062 unhash_delegation(dp);
962 } 1063 }
963 while (!list_empty(&clp->cl_openowners)) { 1064 while (!list_empty(&clp->cl_openowners)) {
964 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 1065 oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
965 release_openowner(sop); 1066 release_openowner(oo);
966 } 1067 }
967 nfsd4_shutdown_callback(clp); 1068 nfsd4_shutdown_callback(clp);
968 if (clp->cl_cb_conn.cb_xprt) 1069 if (clp->cl_cb_conn.cb_xprt)
@@ -1038,6 +1139,23 @@ static void gen_confirm(struct nfs4_client *clp)
1038 *p++ = i++; 1139 *p++ = i++;
1039} 1140}
1040 1141
1142static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
1143{
1144 return idr_find(&cl->cl_stateids, t->si_opaque.so_id);
1145}
1146
1147static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
1148{
1149 struct nfs4_stid *s;
1150
1151 s = find_stateid(cl, t);
1152 if (!s)
1153 return NULL;
1154 if (typemask & s->sc_type)
1155 return s;
1156 return NULL;
1157}
1158
1041static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, 1159static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
1042 struct svc_rqst *rqstp, nfs4_verifier *verf) 1160 struct svc_rqst *rqstp, nfs4_verifier *verf)
1043{ 1161{
@@ -1060,6 +1178,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
1060 } 1178 }
1061 } 1179 }
1062 1180
1181 idr_init(&clp->cl_stateids);
1063 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 1182 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
1064 atomic_set(&clp->cl_refcount, 0); 1183 atomic_set(&clp->cl_refcount, 0);
1065 clp->cl_cb_state = NFSD4_CB_UNKNOWN; 1184 clp->cl_cb_state = NFSD4_CB_UNKNOWN;
@@ -1083,17 +1202,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
1083 return clp; 1202 return clp;
1084} 1203}
1085 1204
1086static int check_name(struct xdr_netobj name)
1087{
1088 if (name.len == 0)
1089 return 0;
1090 if (name.len > NFS4_OPAQUE_LIMIT) {
1091 dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
1092 return 0;
1093 }
1094 return 1;
1095}
1096
1097static void 1205static void
1098add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) 1206add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
1099{ 1207{
@@ -1125,8 +1233,10 @@ find_confirmed_client(clientid_t *clid)
1125 unsigned int idhashval = clientid_hashval(clid->cl_id); 1233 unsigned int idhashval = clientid_hashval(clid->cl_id);
1126 1234
1127 list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { 1235 list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
1128 if (same_clid(&clp->cl_clientid, clid)) 1236 if (same_clid(&clp->cl_clientid, clid)) {
1237 renew_client(clp);
1129 return clp; 1238 return clp;
1239 }
1130 } 1240 }
1131 return NULL; 1241 return NULL;
1132} 1242}
@@ -1173,20 +1283,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
1173 return NULL; 1283 return NULL;
1174} 1284}
1175 1285
1176static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr)
1177{
1178 switch (family) {
1179 case AF_INET:
1180 ((struct sockaddr_in *)sa)->sin_family = AF_INET;
1181 ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr;
1182 return;
1183 case AF_INET6:
1184 ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6;
1185 ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6;
1186 return;
1187 }
1188}
1189
1190static void 1286static void
1191gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) 1287gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
1192{ 1288{
@@ -1218,7 +1314,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
1218 1314
1219 conn->cb_prog = se->se_callback_prog; 1315 conn->cb_prog = se->se_callback_prog;
1220 conn->cb_ident = se->se_callback_ident; 1316 conn->cb_ident = se->se_callback_ident;
1221 rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr); 1317 memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
1222 return; 1318 return;
1223out_err: 1319out_err:
1224 conn->cb_addr.ss_family = AF_UNSPEC; 1320 conn->cb_addr.ss_family = AF_UNSPEC;
@@ -1350,7 +1446,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1350 __func__, rqstp, exid, exid->clname.len, exid->clname.data, 1446 __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1351 addr_str, exid->flags, exid->spa_how); 1447 addr_str, exid->flags, exid->spa_how);
1352 1448
1353 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) 1449 if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
1354 return nfserr_inval; 1450 return nfserr_inval;
1355 1451
1356 /* Currently only support SP4_NONE */ 1452 /* Currently only support SP4_NONE */
@@ -1849,8 +1945,16 @@ out:
1849 1945
1850 nfsd4_get_session(cstate->session); 1946 nfsd4_get_session(cstate->session);
1851 atomic_inc(&clp->cl_refcount); 1947 atomic_inc(&clp->cl_refcount);
1852 if (clp->cl_cb_state == NFSD4_CB_DOWN) 1948 switch (clp->cl_cb_state) {
1853 seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN; 1949 case NFSD4_CB_DOWN:
1950 seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
1951 break;
1952 case NFSD4_CB_FAULT:
1953 seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
1954 break;
1955 default:
1956 seq->status_flags = 0;
1957 }
1854 } 1958 }
1855 kfree(conn); 1959 kfree(conn);
1856 spin_unlock(&client_lock); 1960 spin_unlock(&client_lock);
@@ -1858,6 +1962,50 @@ out:
1858 return status; 1962 return status;
1859} 1963}
1860 1964
1965static inline bool has_resources(struct nfs4_client *clp)
1966{
1967 return !list_empty(&clp->cl_openowners)
1968 || !list_empty(&clp->cl_delegations)
1969 || !list_empty(&clp->cl_sessions);
1970}
1971
1972__be32
1973nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
1974{
1975 struct nfs4_client *conf, *unconf, *clp;
1976 int status = 0;
1977
1978 nfs4_lock_state();
1979 unconf = find_unconfirmed_client(&dc->clientid);
1980 conf = find_confirmed_client(&dc->clientid);
1981
1982 if (conf) {
1983 clp = conf;
1984
1985 if (!is_client_expired(conf) && has_resources(conf)) {
1986 status = nfserr_clientid_busy;
1987 goto out;
1988 }
1989
1990 /* rfc5661 18.50.3 */
1991 if (cstate->session && conf == cstate->session->se_client) {
1992 status = nfserr_clientid_busy;
1993 goto out;
1994 }
1995 } else if (unconf)
1996 clp = unconf;
1997 else {
1998 status = nfserr_stale_clientid;
1999 goto out;
2000 }
2001
2002 expire_client(clp);
2003out:
2004 nfs4_unlock_state();
2005 dprintk("%s return %d\n", __func__, ntohl(status));
2006 return status;
2007}
2008
1861__be32 2009__be32
1862nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) 2010nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
1863{ 2011{
@@ -1900,19 +2048,13 @@ __be32
1900nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 2048nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1901 struct nfsd4_setclientid *setclid) 2049 struct nfsd4_setclientid *setclid)
1902{ 2050{
1903 struct xdr_netobj clname = { 2051 struct xdr_netobj clname = setclid->se_name;
1904 .len = setclid->se_namelen,
1905 .data = setclid->se_name,
1906 };
1907 nfs4_verifier clverifier = setclid->se_verf; 2052 nfs4_verifier clverifier = setclid->se_verf;
1908 unsigned int strhashval; 2053 unsigned int strhashval;
1909 struct nfs4_client *conf, *unconf, *new; 2054 struct nfs4_client *conf, *unconf, *new;
1910 __be32 status; 2055 __be32 status;
1911 char dname[HEXDIR_LEN]; 2056 char dname[HEXDIR_LEN];
1912 2057
1913 if (!check_name(clname))
1914 return nfserr_inval;
1915
1916 status = nfs4_make_rec_clidname(dname, &clname); 2058 status = nfs4_make_rec_clidname(dname, &clname);
1917 if (status) 2059 if (status)
1918 return status; 2060 return status;
@@ -1946,7 +2088,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1946 * of 5 bullet points, labeled as CASE0 - CASE4 below. 2088 * of 5 bullet points, labeled as CASE0 - CASE4 below.
1947 */ 2089 */
1948 unconf = find_unconfirmed_client_by_str(dname, strhashval); 2090 unconf = find_unconfirmed_client_by_str(dname, strhashval);
1949 status = nfserr_resource; 2091 status = nfserr_jukebox;
1950 if (!conf) { 2092 if (!conf) {
1951 /* 2093 /*
1952 * RFC 3530 14.2.33 CASE 4: 2094 * RFC 3530 14.2.33 CASE 4:
@@ -2116,31 +2258,28 @@ out:
2116 return status; 2258 return status;
2117} 2259}
2118 2260
2261static struct nfs4_file *nfsd4_alloc_file(void)
2262{
2263 return kmem_cache_alloc(file_slab, GFP_KERNEL);
2264}
2265
2119/* OPEN Share state helper functions */ 2266/* OPEN Share state helper functions */
2120static inline struct nfs4_file * 2267static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
2121alloc_init_file(struct inode *ino)
2122{ 2268{
2123 struct nfs4_file *fp;
2124 unsigned int hashval = file_hashval(ino); 2269 unsigned int hashval = file_hashval(ino);
2125 2270
2126 fp = kmem_cache_alloc(file_slab, GFP_KERNEL); 2271 atomic_set(&fp->fi_ref, 1);
2127 if (fp) { 2272 INIT_LIST_HEAD(&fp->fi_hash);
2128 atomic_set(&fp->fi_ref, 1); 2273 INIT_LIST_HEAD(&fp->fi_stateids);
2129 INIT_LIST_HEAD(&fp->fi_hash); 2274 INIT_LIST_HEAD(&fp->fi_delegations);
2130 INIT_LIST_HEAD(&fp->fi_stateids); 2275 fp->fi_inode = igrab(ino);
2131 INIT_LIST_HEAD(&fp->fi_delegations); 2276 fp->fi_had_conflict = false;
2132 fp->fi_inode = igrab(ino); 2277 fp->fi_lease = NULL;
2133 fp->fi_id = current_fileid++; 2278 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2134 fp->fi_had_conflict = false; 2279 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2135 fp->fi_lease = NULL; 2280 spin_lock(&recall_lock);
2136 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 2281 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
2137 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 2282 spin_unlock(&recall_lock);
2138 spin_lock(&recall_lock);
2139 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
2140 spin_unlock(&recall_lock);
2141 return fp;
2142 }
2143 return NULL;
2144} 2283}
2145 2284
2146static void 2285static void
@@ -2155,7 +2294,8 @@ nfsd4_free_slab(struct kmem_cache **slab)
2155void 2294void
2156nfsd4_free_slabs(void) 2295nfsd4_free_slabs(void)
2157{ 2296{
2158 nfsd4_free_slab(&stateowner_slab); 2297 nfsd4_free_slab(&openowner_slab);
2298 nfsd4_free_slab(&lockowner_slab);
2159 nfsd4_free_slab(&file_slab); 2299 nfsd4_free_slab(&file_slab);
2160 nfsd4_free_slab(&stateid_slab); 2300 nfsd4_free_slab(&stateid_slab);
2161 nfsd4_free_slab(&deleg_slab); 2301 nfsd4_free_slab(&deleg_slab);
@@ -2164,16 +2304,20 @@ nfsd4_free_slabs(void)
2164static int 2304static int
2165nfsd4_init_slabs(void) 2305nfsd4_init_slabs(void)
2166{ 2306{
2167 stateowner_slab = kmem_cache_create("nfsd4_stateowners", 2307 openowner_slab = kmem_cache_create("nfsd4_openowners",
2168 sizeof(struct nfs4_stateowner), 0, 0, NULL); 2308 sizeof(struct nfs4_openowner), 0, 0, NULL);
2169 if (stateowner_slab == NULL) 2309 if (openowner_slab == NULL)
2310 goto out_nomem;
2311 lockowner_slab = kmem_cache_create("nfsd4_lockowners",
2312 sizeof(struct nfs4_openowner), 0, 0, NULL);
2313 if (lockowner_slab == NULL)
2170 goto out_nomem; 2314 goto out_nomem;
2171 file_slab = kmem_cache_create("nfsd4_files", 2315 file_slab = kmem_cache_create("nfsd4_files",
2172 sizeof(struct nfs4_file), 0, 0, NULL); 2316 sizeof(struct nfs4_file), 0, 0, NULL);
2173 if (file_slab == NULL) 2317 if (file_slab == NULL)
2174 goto out_nomem; 2318 goto out_nomem;
2175 stateid_slab = kmem_cache_create("nfsd4_stateids", 2319 stateid_slab = kmem_cache_create("nfsd4_stateids",
2176 sizeof(struct nfs4_stateid), 0, 0, NULL); 2320 sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
2177 if (stateid_slab == NULL) 2321 if (stateid_slab == NULL)
2178 goto out_nomem; 2322 goto out_nomem;
2179 deleg_slab = kmem_cache_create("nfsd4_delegations", 2323 deleg_slab = kmem_cache_create("nfsd4_delegations",
@@ -2187,97 +2331,94 @@ out_nomem:
2187 return -ENOMEM; 2331 return -ENOMEM;
2188} 2332}
2189 2333
2190void 2334void nfs4_free_openowner(struct nfs4_openowner *oo)
2191nfs4_free_stateowner(struct kref *kref)
2192{ 2335{
2193 struct nfs4_stateowner *sop = 2336 kfree(oo->oo_owner.so_owner.data);
2194 container_of(kref, struct nfs4_stateowner, so_ref); 2337 kmem_cache_free(openowner_slab, oo);
2195 kfree(sop->so_owner.data);
2196 kmem_cache_free(stateowner_slab, sop);
2197} 2338}
2198 2339
2199static inline struct nfs4_stateowner * 2340void nfs4_free_lockowner(struct nfs4_lockowner *lo)
2200alloc_stateowner(struct xdr_netobj *owner)
2201{ 2341{
2202 struct nfs4_stateowner *sop; 2342 kfree(lo->lo_owner.so_owner.data);
2343 kmem_cache_free(lockowner_slab, lo);
2344}
2203 2345
2204 if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { 2346static void init_nfs4_replay(struct nfs4_replay *rp)
2205 if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { 2347{
2206 memcpy(sop->so_owner.data, owner->data, owner->len); 2348 rp->rp_status = nfserr_serverfault;
2207 sop->so_owner.len = owner->len; 2349 rp->rp_buflen = 0;
2208 kref_init(&sop->so_ref); 2350 rp->rp_buf = rp->rp_ibuf;
2209 return sop;
2210 }
2211 kmem_cache_free(stateowner_slab, sop);
2212 }
2213 return NULL;
2214} 2351}
2215 2352
2216static struct nfs4_stateowner * 2353static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
2217alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { 2354{
2218 struct nfs4_stateowner *sop; 2355 struct nfs4_stateowner *sop;
2219 struct nfs4_replay *rp;
2220 unsigned int idhashval;
2221 2356
2222 if (!(sop = alloc_stateowner(&open->op_owner))) 2357 sop = kmem_cache_alloc(slab, GFP_KERNEL);
2358 if (!sop)
2359 return NULL;
2360
2361 sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
2362 if (!sop->so_owner.data) {
2363 kmem_cache_free(slab, sop);
2223 return NULL; 2364 return NULL;
2224 idhashval = ownerid_hashval(current_ownerid); 2365 }
2225 INIT_LIST_HEAD(&sop->so_idhash); 2366 sop->so_owner.len = owner->len;
2226 INIT_LIST_HEAD(&sop->so_strhash); 2367
2227 INIT_LIST_HEAD(&sop->so_perclient);
2228 INIT_LIST_HEAD(&sop->so_stateids); 2368 INIT_LIST_HEAD(&sop->so_stateids);
2229 INIT_LIST_HEAD(&sop->so_perstateid); /* not used */
2230 INIT_LIST_HEAD(&sop->so_close_lru);
2231 sop->so_time = 0;
2232 list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
2233 list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
2234 list_add(&sop->so_perclient, &clp->cl_openowners);
2235 sop->so_is_open_owner = 1;
2236 sop->so_id = current_ownerid++;
2237 sop->so_client = clp; 2369 sop->so_client = clp;
2238 sop->so_seqid = open->op_seqid; 2370 init_nfs4_replay(&sop->so_replay);
2239 sop->so_confirmed = 0;
2240 rp = &sop->so_replay;
2241 rp->rp_status = nfserr_serverfault;
2242 rp->rp_buflen = 0;
2243 rp->rp_buf = rp->rp_ibuf;
2244 return sop; 2371 return sop;
2245} 2372}
2246 2373
2247static inline void 2374static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
2248init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 2375{
2249 struct nfs4_stateowner *sop = open->op_stateowner; 2376 list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
2250 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 2377 list_add(&oo->oo_perclient, &clp->cl_openowners);
2378}
2251 2379
2252 INIT_LIST_HEAD(&stp->st_hash); 2380static struct nfs4_openowner *
2253 INIT_LIST_HEAD(&stp->st_perstateowner); 2381alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
2382 struct nfs4_openowner *oo;
2383
2384 oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
2385 if (!oo)
2386 return NULL;
2387 oo->oo_owner.so_is_open_owner = 1;
2388 oo->oo_owner.so_seqid = open->op_seqid;
2389 oo->oo_flags = NFS4_OO_NEW;
2390 oo->oo_time = 0;
2391 oo->oo_last_closed_stid = NULL;
2392 INIT_LIST_HEAD(&oo->oo_close_lru);
2393 hash_openowner(oo, clp, strhashval);
2394 return oo;
2395}
2396
2397static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
2398 struct nfs4_openowner *oo = open->op_openowner;
2399 struct nfs4_client *clp = oo->oo_owner.so_client;
2400
2401 init_stid(&stp->st_stid, clp, NFS4_OPEN_STID);
2254 INIT_LIST_HEAD(&stp->st_lockowners); 2402 INIT_LIST_HEAD(&stp->st_lockowners);
2255 INIT_LIST_HEAD(&stp->st_perfile); 2403 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
2256 list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
2257 list_add(&stp->st_perstateowner, &sop->so_stateids);
2258 list_add(&stp->st_perfile, &fp->fi_stateids); 2404 list_add(&stp->st_perfile, &fp->fi_stateids);
2259 stp->st_stateowner = sop; 2405 stp->st_stateowner = &oo->oo_owner;
2260 get_nfs4_file(fp); 2406 get_nfs4_file(fp);
2261 stp->st_file = fp; 2407 stp->st_file = fp;
2262 stp->st_stateid.si_boot = boot_time;
2263 stp->st_stateid.si_stateownerid = sop->so_id;
2264 stp->st_stateid.si_fileid = fp->fi_id;
2265 stp->st_stateid.si_generation = 0;
2266 stp->st_access_bmap = 0; 2408 stp->st_access_bmap = 0;
2267 stp->st_deny_bmap = 0; 2409 stp->st_deny_bmap = 0;
2268 __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, 2410 __set_bit(open->op_share_access, &stp->st_access_bmap);
2269 &stp->st_access_bmap);
2270 __set_bit(open->op_share_deny, &stp->st_deny_bmap); 2411 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
2271 stp->st_openstp = NULL; 2412 stp->st_openstp = NULL;
2272} 2413}
2273 2414
2274static void 2415static void
2275move_to_close_lru(struct nfs4_stateowner *sop) 2416move_to_close_lru(struct nfs4_openowner *oo)
2276{ 2417{
2277 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 2418 dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
2278 2419
2279 list_move_tail(&sop->so_close_lru, &close_lru); 2420 list_move_tail(&oo->oo_close_lru, &close_lru);
2280 sop->so_time = get_seconds(); 2421 oo->oo_time = get_seconds();
2281} 2422}
2282 2423
2283static int 2424static int
@@ -2289,14 +2430,18 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
2289 (sop->so_client->cl_clientid.cl_id == clid->cl_id); 2430 (sop->so_client->cl_clientid.cl_id == clid->cl_id);
2290} 2431}
2291 2432
2292static struct nfs4_stateowner * 2433static struct nfs4_openowner *
2293find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) 2434find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
2294{ 2435{
2295 struct nfs4_stateowner *so = NULL; 2436 struct nfs4_stateowner *so;
2437 struct nfs4_openowner *oo;
2296 2438
2297 list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { 2439 list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
2298 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) 2440 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
2299 return so; 2441 oo = openowner(so);
2442 renew_client(oo->oo_owner.so_client);
2443 return oo;
2444 }
2300 } 2445 }
2301 return NULL; 2446 return NULL;
2302} 2447}
@@ -2320,31 +2465,6 @@ find_file(struct inode *ino)
2320 return NULL; 2465 return NULL;
2321} 2466}
2322 2467
2323static inline int access_valid(u32 x, u32 minorversion)
2324{
2325 if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
2326 return 0;
2327 if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH)
2328 return 0;
2329 x &= ~NFS4_SHARE_ACCESS_MASK;
2330 if (minorversion && x) {
2331 if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL)
2332 return 0;
2333 if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED)
2334 return 0;
2335 x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK);
2336 }
2337 if (x)
2338 return 0;
2339 return 1;
2340}
2341
2342static inline int deny_valid(u32 x)
2343{
2344 /* Note: unlike access bits, deny bits may be zero. */
2345 return x <= NFS4_SHARE_DENY_BOTH;
2346}
2347
2348/* 2468/*
2349 * Called to check deny when READ with all zero stateid or 2469 * Called to check deny when READ with all zero stateid or
2350 * WRITE with all zero or all one stateid 2470 * WRITE with all zero or all one stateid
@@ -2354,7 +2474,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
2354{ 2474{
2355 struct inode *ino = current_fh->fh_dentry->d_inode; 2475 struct inode *ino = current_fh->fh_dentry->d_inode;
2356 struct nfs4_file *fp; 2476 struct nfs4_file *fp;
2357 struct nfs4_stateid *stp; 2477 struct nfs4_ol_stateid *stp;
2358 __be32 ret; 2478 __be32 ret;
2359 2479
2360 dprintk("NFSD: nfs4_share_conflict\n"); 2480 dprintk("NFSD: nfs4_share_conflict\n");
@@ -2429,6 +2549,16 @@ static const struct lock_manager_operations nfsd_lease_mng_ops = {
2429 .lm_change = nfsd_change_deleg_cb, 2549 .lm_change = nfsd_change_deleg_cb,
2430}; 2550};
2431 2551
2552static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
2553{
2554 if (nfsd4_has_session(cstate))
2555 return nfs_ok;
2556 if (seqid == so->so_seqid - 1)
2557 return nfserr_replay_me;
2558 if (seqid == so->so_seqid)
2559 return nfs_ok;
2560 return nfserr_bad_seqid;
2561}
2432 2562
2433__be32 2563__be32
2434nfsd4_process_open1(struct nfsd4_compound_state *cstate, 2564nfsd4_process_open1(struct nfsd4_compound_state *cstate,
@@ -2437,57 +2567,49 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2437 clientid_t *clientid = &open->op_clientid; 2567 clientid_t *clientid = &open->op_clientid;
2438 struct nfs4_client *clp = NULL; 2568 struct nfs4_client *clp = NULL;
2439 unsigned int strhashval; 2569 unsigned int strhashval;
2440 struct nfs4_stateowner *sop = NULL; 2570 struct nfs4_openowner *oo = NULL;
2441 2571 __be32 status;
2442 if (!check_name(open->op_owner))
2443 return nfserr_inval;
2444 2572
2445 if (STALE_CLIENTID(&open->op_clientid)) 2573 if (STALE_CLIENTID(&open->op_clientid))
2446 return nfserr_stale_clientid; 2574 return nfserr_stale_clientid;
2575 /*
2576 * In case we need it later, after we've already created the
2577 * file and don't want to risk a further failure:
2578 */
2579 open->op_file = nfsd4_alloc_file();
2580 if (open->op_file == NULL)
2581 return nfserr_jukebox;
2447 2582
2448 strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); 2583 strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
2449 sop = find_openstateowner_str(strhashval, open); 2584 oo = find_openstateowner_str(strhashval, open);
2450 open->op_stateowner = sop; 2585 open->op_openowner = oo;
2451 if (!sop) { 2586 if (!oo) {
2452 /* Make sure the client's lease hasn't expired. */
2453 clp = find_confirmed_client(clientid); 2587 clp = find_confirmed_client(clientid);
2454 if (clp == NULL) 2588 if (clp == NULL)
2455 return nfserr_expired; 2589 return nfserr_expired;
2456 goto renew; 2590 goto new_owner;
2457 } 2591 }
2458 /* When sessions are used, skip open sequenceid processing */ 2592 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
2459 if (nfsd4_has_session(cstate))
2460 goto renew;
2461 if (!sop->so_confirmed) {
2462 /* Replace unconfirmed owners without checking for replay. */ 2593 /* Replace unconfirmed owners without checking for replay. */
2463 clp = sop->so_client; 2594 clp = oo->oo_owner.so_client;
2464 release_openowner(sop); 2595 release_openowner(oo);
2465 open->op_stateowner = NULL; 2596 open->op_openowner = NULL;
2466 goto renew; 2597 goto new_owner;
2467 }
2468 if (open->op_seqid == sop->so_seqid - 1) {
2469 if (sop->so_replay.rp_buflen)
2470 return nfserr_replay_me;
2471 /* The original OPEN failed so spectacularly
2472 * that we don't even have replay data saved!
2473 * Therefore, we have no choice but to continue
2474 * processing this OPEN; presumably, we'll
2475 * fail again for the same reason.
2476 */
2477 dprintk("nfsd4_process_open1: replay with no replay cache\n");
2478 goto renew;
2479 }
2480 if (open->op_seqid != sop->so_seqid)
2481 return nfserr_bad_seqid;
2482renew:
2483 if (open->op_stateowner == NULL) {
2484 sop = alloc_init_open_stateowner(strhashval, clp, open);
2485 if (sop == NULL)
2486 return nfserr_resource;
2487 open->op_stateowner = sop;
2488 } 2598 }
2489 list_del_init(&sop->so_close_lru); 2599 status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
2490 renew_client(sop->so_client); 2600 if (status)
2601 return status;
2602 clp = oo->oo_owner.so_client;
2603 goto alloc_stateid;
2604new_owner:
2605 oo = alloc_init_open_stateowner(strhashval, clp, open);
2606 if (oo == NULL)
2607 return nfserr_jukebox;
2608 open->op_openowner = oo;
2609alloc_stateid:
2610 open->op_stp = nfs4_alloc_stateid(clp);
2611 if (!open->op_stp)
2612 return nfserr_jukebox;
2491 return nfs_ok; 2613 return nfs_ok;
2492} 2614}
2493 2615
@@ -2500,36 +2622,37 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2500 return nfs_ok; 2622 return nfs_ok;
2501} 2623}
2502 2624
2503static struct nfs4_delegation * 2625static int share_access_to_flags(u32 share_access)
2504find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2505{ 2626{
2506 struct nfs4_delegation *dp; 2627 share_access &= ~NFS4_SHARE_WANT_MASK;
2507 2628
2508 spin_lock(&recall_lock); 2629 return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
2509 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2510 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2511 spin_unlock(&recall_lock);
2512 return dp;
2513 }
2514 spin_unlock(&recall_lock);
2515 return NULL;
2516} 2630}
2517 2631
2518static int share_access_to_flags(u32 share_access) 2632static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
2519{ 2633{
2520 share_access &= ~NFS4_SHARE_WANT_MASK; 2634 struct nfs4_stid *ret;
2521 2635
2522 return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; 2636 ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
2637 if (!ret)
2638 return NULL;
2639 return delegstateid(ret);
2640}
2641
2642static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
2643{
2644 return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
2645 open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH;
2523} 2646}
2524 2647
2525static __be32 2648static __be32
2526nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, 2649nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open,
2527 struct nfs4_delegation **dp) 2650 struct nfs4_delegation **dp)
2528{ 2651{
2529 int flags; 2652 int flags;
2530 __be32 status = nfserr_bad_stateid; 2653 __be32 status = nfserr_bad_stateid;
2531 2654
2532 *dp = find_delegation_file(fp, &open->op_delegate_stateid); 2655 *dp = find_deleg_stateid(cl, &open->op_delegate_stateid);
2533 if (*dp == NULL) 2656 if (*dp == NULL)
2534 goto out; 2657 goto out;
2535 flags = share_access_to_flags(open->op_share_access); 2658 flags = share_access_to_flags(open->op_share_access);
@@ -2537,41 +2660,37 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
2537 if (status) 2660 if (status)
2538 *dp = NULL; 2661 *dp = NULL;
2539out: 2662out:
2540 if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR) 2663 if (!nfsd4_is_deleg_cur(open))
2541 return nfs_ok; 2664 return nfs_ok;
2542 if (status) 2665 if (status)
2543 return status; 2666 return status;
2544 open->op_stateowner->so_confirmed = 1; 2667 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
2545 return nfs_ok; 2668 return nfs_ok;
2546} 2669}
2547 2670
2548static __be32 2671static __be32
2549nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) 2672nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp)
2550{ 2673{
2551 struct nfs4_stateid *local; 2674 struct nfs4_ol_stateid *local;
2552 __be32 status = nfserr_share_denied; 2675 struct nfs4_openowner *oo = open->op_openowner;
2553 struct nfs4_stateowner *sop = open->op_stateowner;
2554 2676
2555 list_for_each_entry(local, &fp->fi_stateids, st_perfile) { 2677 list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
2556 /* ignore lock owners */ 2678 /* ignore lock owners */
2557 if (local->st_stateowner->so_is_open_owner == 0) 2679 if (local->st_stateowner->so_is_open_owner == 0)
2558 continue; 2680 continue;
2559 /* remember if we have seen this open owner */ 2681 /* remember if we have seen this open owner */
2560 if (local->st_stateowner == sop) 2682 if (local->st_stateowner == &oo->oo_owner)
2561 *stpp = local; 2683 *stpp = local;
2562 /* check for conflicting share reservations */ 2684 /* check for conflicting share reservations */
2563 if (!test_share(local, open)) 2685 if (!test_share(local, open))
2564 goto out; 2686 return nfserr_share_denied;
2565 } 2687 }
2566 status = 0; 2688 return nfs_ok;
2567out:
2568 return status;
2569} 2689}
2570 2690
2571static inline struct nfs4_stateid * 2691static void nfs4_free_stateid(struct nfs4_ol_stateid *s)
2572nfs4_alloc_stateid(void)
2573{ 2692{
2574 return kmem_cache_alloc(stateid_slab, GFP_KERNEL); 2693 kmem_cache_free(stateid_slab, s);
2575} 2694}
2576 2695
2577static inline int nfs4_access_to_access(u32 nfs4_access) 2696static inline int nfs4_access_to_access(u32 nfs4_access)
@@ -2592,12 +2711,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
2592 int oflag = nfs4_access_to_omode(open->op_share_access); 2711 int oflag = nfs4_access_to_omode(open->op_share_access);
2593 int access = nfs4_access_to_access(open->op_share_access); 2712 int access = nfs4_access_to_access(open->op_share_access);
2594 2713
2595 /* CLAIM_DELEGATE_CUR is used in response to a broken lease;
2596 * allowing it to break the lease and return EAGAIN leaves the
2597 * client unable to make progress in returning the delegation */
2598 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
2599 access |= NFSD_MAY_NOT_BREAK_LEASE;
2600
2601 if (!fp->fi_fds[oflag]) { 2714 if (!fp->fi_fds[oflag]) {
2602 status = nfsd_open(rqstp, cur_fh, S_IFREG, access, 2715 status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
2603 &fp->fi_fds[oflag]); 2716 &fp->fi_fds[oflag]);
@@ -2609,27 +2722,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
2609 return nfs_ok; 2722 return nfs_ok;
2610} 2723}
2611 2724
2612static __be32
2613nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
2614 struct nfs4_file *fp, struct svc_fh *cur_fh,
2615 struct nfsd4_open *open)
2616{
2617 struct nfs4_stateid *stp;
2618 __be32 status;
2619
2620 stp = nfs4_alloc_stateid();
2621 if (stp == NULL)
2622 return nfserr_resource;
2623
2624 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
2625 if (status) {
2626 kmem_cache_free(stateid_slab, stp);
2627 return status;
2628 }
2629 *stpp = stp;
2630 return 0;
2631}
2632
2633static inline __be32 2725static inline __be32
2634nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, 2726nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2635 struct nfsd4_open *open) 2727 struct nfsd4_open *open)
@@ -2646,9 +2738,9 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2646} 2738}
2647 2739
2648static __be32 2740static __be32
2649nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) 2741nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
2650{ 2742{
2651 u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; 2743 u32 op_share_access = open->op_share_access;
2652 bool new_access; 2744 bool new_access;
2653 __be32 status; 2745 __be32 status;
2654 2746
@@ -2677,8 +2769,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
2677static void 2769static void
2678nfs4_set_claim_prev(struct nfsd4_open *open) 2770nfs4_set_claim_prev(struct nfsd4_open *open)
2679{ 2771{
2680 open->op_stateowner->so_confirmed = 1; 2772 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
2681 open->op_stateowner->so_client->cl_firststate = 1; 2773 open->op_openowner->oo_owner.so_client->cl_firststate = 1;
2682} 2774}
2683 2775
2684/* Should we give out recallable state?: */ 2776/* Should we give out recallable state?: */
@@ -2721,7 +2813,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2721 if (!fl) 2813 if (!fl)
2722 return -ENOMEM; 2814 return -ENOMEM;
2723 fl->fl_file = find_readable_file(fp); 2815 fl->fl_file = find_readable_file(fp);
2724 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); 2816 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
2725 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); 2817 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
2726 if (status) { 2818 if (status) {
2727 list_del_init(&dp->dl_perclnt); 2819 list_del_init(&dp->dl_perclnt);
@@ -2750,7 +2842,7 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2750 atomic_inc(&fp->fi_delegees); 2842 atomic_inc(&fp->fi_delegees);
2751 list_add(&dp->dl_perfile, &fp->fi_delegations); 2843 list_add(&dp->dl_perfile, &fp->fi_delegations);
2752 spin_unlock(&recall_lock); 2844 spin_unlock(&recall_lock);
2753 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); 2845 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
2754 return 0; 2846 return 0;
2755} 2847}
2756 2848
@@ -2758,14 +2850,14 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2758 * Attempt to hand out a delegation. 2850 * Attempt to hand out a delegation.
2759 */ 2851 */
2760static void 2852static void
2761nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp) 2853nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
2762{ 2854{
2763 struct nfs4_delegation *dp; 2855 struct nfs4_delegation *dp;
2764 struct nfs4_stateowner *sop = stp->st_stateowner; 2856 struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
2765 int cb_up; 2857 int cb_up;
2766 int status, flag = 0; 2858 int status, flag = 0;
2767 2859
2768 cb_up = nfsd4_cb_channel_good(sop->so_client); 2860 cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
2769 flag = NFS4_OPEN_DELEGATE_NONE; 2861 flag = NFS4_OPEN_DELEGATE_NONE;
2770 open->op_recall = 0; 2862 open->op_recall = 0;
2771 switch (open->op_claim_type) { 2863 switch (open->op_claim_type) {
@@ -2781,7 +2873,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2781 * had the chance to reclaim theirs.... */ 2873 * had the chance to reclaim theirs.... */
2782 if (locks_in_grace()) 2874 if (locks_in_grace())
2783 goto out; 2875 goto out;
2784 if (!cb_up || !sop->so_confirmed) 2876 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
2785 goto out; 2877 goto out;
2786 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 2878 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2787 flag = NFS4_OPEN_DELEGATE_WRITE; 2879 flag = NFS4_OPEN_DELEGATE_WRITE;
@@ -2792,17 +2884,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2792 goto out; 2884 goto out;
2793 } 2885 }
2794 2886
2795 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 2887 dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag);
2796 if (dp == NULL) 2888 if (dp == NULL)
2797 goto out_no_deleg; 2889 goto out_no_deleg;
2798 status = nfs4_set_delegation(dp, flag); 2890 status = nfs4_set_delegation(dp, flag);
2799 if (status) 2891 if (status)
2800 goto out_free; 2892 goto out_free;
2801 2893
2802 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2894 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
2803 2895
2804 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", 2896 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2805 STATEID_VAL(&dp->dl_stateid)); 2897 STATEID_VAL(&dp->dl_stid.sc_stateid));
2806out: 2898out:
2807 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 2899 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2808 && flag == NFS4_OPEN_DELEGATE_NONE 2900 && flag == NFS4_OPEN_DELEGATE_NONE
@@ -2824,16 +2916,13 @@ __be32
2824nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 2916nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
2825{ 2917{
2826 struct nfsd4_compoundres *resp = rqstp->rq_resp; 2918 struct nfsd4_compoundres *resp = rqstp->rq_resp;
2919 struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
2827 struct nfs4_file *fp = NULL; 2920 struct nfs4_file *fp = NULL;
2828 struct inode *ino = current_fh->fh_dentry->d_inode; 2921 struct inode *ino = current_fh->fh_dentry->d_inode;
2829 struct nfs4_stateid *stp = NULL; 2922 struct nfs4_ol_stateid *stp = NULL;
2830 struct nfs4_delegation *dp = NULL; 2923 struct nfs4_delegation *dp = NULL;
2831 __be32 status; 2924 __be32 status;
2832 2925
2833 status = nfserr_inval;
2834 if (!access_valid(open->op_share_access, resp->cstate.minorversion)
2835 || !deny_valid(open->op_share_deny))
2836 goto out;
2837 /* 2926 /*
2838 * Lookup file; if found, lookup stateid and check open request, 2927 * Lookup file; if found, lookup stateid and check open request,
2839 * and check for delegations in the process of being recalled. 2928 * and check for delegations in the process of being recalled.
@@ -2843,17 +2932,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2843 if (fp) { 2932 if (fp) {
2844 if ((status = nfs4_check_open(fp, open, &stp))) 2933 if ((status = nfs4_check_open(fp, open, &stp)))
2845 goto out; 2934 goto out;
2846 status = nfs4_check_deleg(fp, open, &dp); 2935 status = nfs4_check_deleg(cl, fp, open, &dp);
2847 if (status) 2936 if (status)
2848 goto out; 2937 goto out;
2849 } else { 2938 } else {
2850 status = nfserr_bad_stateid; 2939 status = nfserr_bad_stateid;
2851 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) 2940 if (nfsd4_is_deleg_cur(open))
2852 goto out;
2853 status = nfserr_resource;
2854 fp = alloc_init_file(ino);
2855 if (fp == NULL)
2856 goto out; 2941 goto out;
2942 status = nfserr_jukebox;
2943 fp = open->op_file;
2944 open->op_file = NULL;
2945 nfsd4_init_file(fp, ino);
2857 } 2946 }
2858 2947
2859 /* 2948 /*
@@ -2865,24 +2954,24 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2865 status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); 2954 status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
2866 if (status) 2955 if (status)
2867 goto out; 2956 goto out;
2868 update_stateid(&stp->st_stateid);
2869 } else { 2957 } else {
2870 status = nfs4_new_open(rqstp, &stp, fp, current_fh, open); 2958 status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
2871 if (status) 2959 if (status)
2872 goto out; 2960 goto out;
2873 init_stateid(stp, fp, open); 2961 stp = open->op_stp;
2962 open->op_stp = NULL;
2963 init_open_stateid(stp, fp, open);
2874 status = nfsd4_truncate(rqstp, current_fh, open); 2964 status = nfsd4_truncate(rqstp, current_fh, open);
2875 if (status) { 2965 if (status) {
2876 release_open_stateid(stp); 2966 release_open_stateid(stp);
2877 goto out; 2967 goto out;
2878 } 2968 }
2879 if (nfsd4_has_session(&resp->cstate))
2880 update_stateid(&stp->st_stateid);
2881 } 2969 }
2882 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); 2970 update_stateid(&stp->st_stid.sc_stateid);
2971 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
2883 2972
2884 if (nfsd4_has_session(&resp->cstate)) 2973 if (nfsd4_has_session(&resp->cstate))
2885 open->op_stateowner->so_confirmed = 1; 2974 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
2886 2975
2887 /* 2976 /*
2888 * Attempt to hand out a delegation. No error return, because the 2977 * Attempt to hand out a delegation. No error return, because the
@@ -2893,7 +2982,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2893 status = nfs_ok; 2982 status = nfs_ok;
2894 2983
2895 dprintk("%s: stateid=" STATEID_FMT "\n", __func__, 2984 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2896 STATEID_VAL(&stp->st_stateid)); 2985 STATEID_VAL(&stp->st_stid.sc_stateid));
2897out: 2986out:
2898 if (fp) 2987 if (fp)
2899 put_nfs4_file(fp); 2988 put_nfs4_file(fp);
@@ -2903,13 +2992,34 @@ out:
2903 * To finish the open response, we just need to set the rflags. 2992 * To finish the open response, we just need to set the rflags.
2904 */ 2993 */
2905 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; 2994 open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
2906 if (!open->op_stateowner->so_confirmed && 2995 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
2907 !nfsd4_has_session(&resp->cstate)) 2996 !nfsd4_has_session(&resp->cstate))
2908 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; 2997 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
2909 2998
2910 return status; 2999 return status;
2911} 3000}
2912 3001
3002void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status)
3003{
3004 if (open->op_openowner) {
3005 struct nfs4_openowner *oo = open->op_openowner;
3006
3007 if (!list_empty(&oo->oo_owner.so_stateids))
3008 list_del_init(&oo->oo_close_lru);
3009 if (oo->oo_flags & NFS4_OO_NEW) {
3010 if (status) {
3011 release_openowner(oo);
3012 open->op_openowner = NULL;
3013 } else
3014 oo->oo_flags &= ~NFS4_OO_NEW;
3015 }
3016 }
3017 if (open->op_file)
3018 nfsd4_free_file(open->op_file);
3019 if (open->op_stp)
3020 nfs4_free_stateid(open->op_stp);
3021}
3022
2913__be32 3023__be32
2914nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3024nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2915 clientid_t *clid) 3025 clientid_t *clid)
@@ -2930,7 +3040,6 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2930 dprintk("nfsd4_renew: clientid not found!\n"); 3040 dprintk("nfsd4_renew: clientid not found!\n");
2931 goto out; 3041 goto out;
2932 } 3042 }
2933 renew_client(clp);
2934 status = nfserr_cb_path_down; 3043 status = nfserr_cb_path_down;
2935 if (!list_empty(&clp->cl_delegations) 3044 if (!list_empty(&clp->cl_delegations)
2936 && clp->cl_cb_state != NFSD4_CB_UP) 3045 && clp->cl_cb_state != NFSD4_CB_UP)
@@ -2962,7 +3071,7 @@ static time_t
2962nfs4_laundromat(void) 3071nfs4_laundromat(void)
2963{ 3072{
2964 struct nfs4_client *clp; 3073 struct nfs4_client *clp;
2965 struct nfs4_stateowner *sop; 3074 struct nfs4_openowner *oo;
2966 struct nfs4_delegation *dp; 3075 struct nfs4_delegation *dp;
2967 struct list_head *pos, *next, reaplist; 3076 struct list_head *pos, *next, reaplist;
2968 time_t cutoff = get_seconds() - nfsd4_lease; 3077 time_t cutoff = get_seconds() - nfsd4_lease;
@@ -3019,16 +3128,14 @@ nfs4_laundromat(void)
3019 } 3128 }
3020 test_val = nfsd4_lease; 3129 test_val = nfsd4_lease;
3021 list_for_each_safe(pos, next, &close_lru) { 3130 list_for_each_safe(pos, next, &close_lru) {
3022 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); 3131 oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
3023 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { 3132 if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
3024 u = sop->so_time - cutoff; 3133 u = oo->oo_time - cutoff;
3025 if (test_val > u) 3134 if (test_val > u)
3026 test_val = u; 3135 test_val = u;
3027 break; 3136 break;
3028 } 3137 }
3029 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 3138 release_openowner(oo);
3030 sop->so_id);
3031 release_openowner(sop);
3032 } 3139 }
3033 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 3140 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
3034 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 3141 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -3050,30 +3157,17 @@ laundromat_main(struct work_struct *not_used)
3050 queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); 3157 queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
3051} 3158}
3052 3159
3053static struct nfs4_stateowner * 3160static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
3054search_close_lru(u32 st_id, int flags)
3055{ 3161{
3056 struct nfs4_stateowner *local = NULL; 3162 if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode)
3057 3163 return nfserr_bad_stateid;
3058 if (flags & CLOSE_STATE) { 3164 return nfs_ok;
3059 list_for_each_entry(local, &close_lru, so_close_lru) {
3060 if (local->so_id == st_id)
3061 return local;
3062 }
3063 }
3064 return NULL;
3065}
3066
3067static inline int
3068nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
3069{
3070 return fhp->fh_dentry->d_inode != stp->st_file->fi_inode;
3071} 3165}
3072 3166
3073static int 3167static int
3074STALE_STATEID(stateid_t *stateid) 3168STALE_STATEID(stateid_t *stateid)
3075{ 3169{
3076 if (stateid->si_boot == boot_time) 3170 if (stateid->si_opaque.so_clid.cl_boot == boot_time)
3077 return 0; 3171 return 0;
3078 dprintk("NFSD: stale stateid " STATEID_FMT "!\n", 3172 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
3079 STATEID_VAL(stateid)); 3173 STATEID_VAL(stateid));
@@ -3096,7 +3190,7 @@ access_permit_write(unsigned long access_bmap)
3096} 3190}
3097 3191
3098static 3192static
3099__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) 3193__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
3100{ 3194{
3101 __be32 status = nfserr_openmode; 3195 __be32 status = nfserr_openmode;
3102 3196
@@ -3139,68 +3233,80 @@ grace_disallows_io(struct inode *inode)
3139 return locks_in_grace() && mandatory_lock(inode); 3233 return locks_in_grace() && mandatory_lock(inode);
3140} 3234}
3141 3235
3142static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) 3236/* Returns true iff a is later than b: */
3237static bool stateid_generation_after(stateid_t *a, stateid_t *b)
3238{
3239 return (s32)a->si_generation - (s32)b->si_generation > 0;
3240}
3241
3242static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
3143{ 3243{
3144 /* 3244 /*
3145 * When sessions are used the stateid generation number is ignored 3245 * When sessions are used the stateid generation number is ignored
3146 * when it is zero. 3246 * when it is zero.
3147 */ 3247 */
3148 if ((flags & HAS_SESSION) && in->si_generation == 0) 3248 if (has_session && in->si_generation == 0)
3149 goto out; 3249 return nfs_ok;
3250
3251 if (in->si_generation == ref->si_generation)
3252 return nfs_ok;
3150 3253
3151 /* If the client sends us a stateid from the future, it's buggy: */ 3254 /* If the client sends us a stateid from the future, it's buggy: */
3152 if (in->si_generation > ref->si_generation) 3255 if (stateid_generation_after(in, ref))
3153 return nfserr_bad_stateid; 3256 return nfserr_bad_stateid;
3154 /* 3257 /*
3155 * The following, however, can happen. For example, if the 3258 * However, we could see a stateid from the past, even from a
3156 * client sends an open and some IO at the same time, the open 3259 * non-buggy client. For example, if the client sends a lock
3157 * may bump si_generation while the IO is still in flight. 3260 * while some IO is outstanding, the lock may bump si_generation
3158 * Thanks to hard links and renames, the client never knows what 3261 * while the IO is still in flight. The client could avoid that
3159 * file an open will affect. So it could avoid that situation 3262 * situation by waiting for responses on all the IO requests,
3160 * only by serializing all opens and IO from the same open 3263 * but better performance may result in retrying IO that
3161 * owner. To recover from the old_stateid error, the client 3264 * receives an old_stateid error if requests are rarely
3162 * will just have to retry the IO: 3265 * reordered in flight:
3163 */ 3266 */
3164 if (in->si_generation < ref->si_generation) 3267 return nfserr_old_stateid;
3165 return nfserr_old_stateid;
3166out:
3167 return nfs_ok;
3168} 3268}
3169 3269
3170static int is_delegation_stateid(stateid_t *stateid) 3270__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
3171{ 3271{
3172 return stateid->si_fileid == 0; 3272 struct nfs4_stid *s;
3173} 3273 struct nfs4_ol_stateid *ols;
3274 __be32 status;
3174 3275
3175static int is_open_stateid(struct nfs4_stateid *stateid) 3276 if (STALE_STATEID(stateid))
3176{ 3277 return nfserr_stale_stateid;
3177 return stateid->st_openstp == NULL; 3278
3279 s = find_stateid(cl, stateid);
3280 if (!s)
3281 return nfserr_stale_stateid;
3282 status = check_stateid_generation(stateid, &s->sc_stateid, 1);
3283 if (status)
3284 return status;
3285 if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID)))
3286 return nfs_ok;
3287 ols = openlockstateid(s);
3288 if (ols->st_stateowner->so_is_open_owner
3289 && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
3290 return nfserr_bad_stateid;
3291 return nfs_ok;
3178} 3292}
3179 3293
3180__be32 nfs4_validate_stateid(stateid_t *stateid, int flags) 3294static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s)
3181{ 3295{
3182 struct nfs4_stateid *stp = NULL; 3296 struct nfs4_client *cl;
3183 __be32 status = nfserr_stale_stateid;
3184 3297
3298 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3299 return nfserr_bad_stateid;
3185 if (STALE_STATEID(stateid)) 3300 if (STALE_STATEID(stateid))
3186 goto out; 3301 return nfserr_stale_stateid;
3187 3302 cl = find_confirmed_client(&stateid->si_opaque.so_clid);
3188 status = nfserr_expired; 3303 if (!cl)
3189 stp = search_for_stateid(stateid); 3304 return nfserr_expired;
3190 if (!stp) 3305 *s = find_stateid_by_type(cl, stateid, typemask);
3191 goto out; 3306 if (!*s)
3192 status = nfserr_bad_stateid; 3307 return nfserr_bad_stateid;
3193 3308 return nfs_ok;
3194 if (!stp->st_stateowner->so_confirmed)
3195 goto out;
3196
3197 status = check_stateid_generation(stateid, &stp->st_stateid, flags);
3198 if (status)
3199 goto out;
3200 3309
3201 status = nfs_ok;
3202out:
3203 return status;
3204} 3310}
3205 3311
3206/* 3312/*
@@ -3210,7 +3316,8 @@ __be32
3210nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, 3316nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3211 stateid_t *stateid, int flags, struct file **filpp) 3317 stateid_t *stateid, int flags, struct file **filpp)
3212{ 3318{
3213 struct nfs4_stateid *stp = NULL; 3319 struct nfs4_stid *s;
3320 struct nfs4_ol_stateid *stp = NULL;
3214 struct nfs4_delegation *dp = NULL; 3321 struct nfs4_delegation *dp = NULL;
3215 struct svc_fh *current_fh = &cstate->current_fh; 3322 struct svc_fh *current_fh = &cstate->current_fh;
3216 struct inode *ino = current_fh->fh_dentry->d_inode; 3323 struct inode *ino = current_fh->fh_dentry->d_inode;
@@ -3222,60 +3329,47 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3222 if (grace_disallows_io(ino)) 3329 if (grace_disallows_io(ino))
3223 return nfserr_grace; 3330 return nfserr_grace;
3224 3331
3225 if (nfsd4_has_session(cstate))
3226 flags |= HAS_SESSION;
3227
3228 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 3332 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3229 return check_special_stateids(current_fh, stateid, flags); 3333 return check_special_stateids(current_fh, stateid, flags);
3230 3334
3231 status = nfserr_stale_stateid; 3335 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s);
3232 if (STALE_STATEID(stateid)) 3336 if (status)
3337 return status;
3338 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
3339 if (status)
3233 goto out; 3340 goto out;
3234 3341 switch (s->sc_type) {
3235 /* 3342 case NFS4_DELEG_STID:
3236 * We assume that any stateid that has the current boot time, 3343 dp = delegstateid(s);
3237 * but that we can't find, is expired:
3238 */
3239 status = nfserr_expired;
3240 if (is_delegation_stateid(stateid)) {
3241 dp = find_delegation_stateid(ino, stateid);
3242 if (!dp)
3243 goto out;
3244 status = check_stateid_generation(stateid, &dp->dl_stateid,
3245 flags);
3246 if (status)
3247 goto out;
3248 status = nfs4_check_delegmode(dp, flags); 3344 status = nfs4_check_delegmode(dp, flags);
3249 if (status) 3345 if (status)
3250 goto out; 3346 goto out;
3251 renew_client(dp->dl_client);
3252 if (filpp) { 3347 if (filpp) {
3253 *filpp = dp->dl_file->fi_deleg_file; 3348 *filpp = dp->dl_file->fi_deleg_file;
3254 BUG_ON(!*filpp); 3349 BUG_ON(!*filpp);
3255 } 3350 }
3256 } else { /* open or lock stateid */ 3351 break;
3257 stp = find_stateid(stateid, flags); 3352 case NFS4_OPEN_STID:
3258 if (!stp) 3353 case NFS4_LOCK_STID:
3259 goto out; 3354 stp = openlockstateid(s);
3260 status = nfserr_bad_stateid; 3355 status = nfs4_check_fh(current_fh, stp);
3261 if (nfs4_check_fh(current_fh, stp))
3262 goto out;
3263 if (!stp->st_stateowner->so_confirmed)
3264 goto out;
3265 status = check_stateid_generation(stateid, &stp->st_stateid,
3266 flags);
3267 if (status) 3356 if (status)
3268 goto out; 3357 goto out;
3358 if (stp->st_stateowner->so_is_open_owner
3359 && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
3360 goto out;
3269 status = nfs4_check_openmode(stp, flags); 3361 status = nfs4_check_openmode(stp, flags);
3270 if (status) 3362 if (status)
3271 goto out; 3363 goto out;
3272 renew_client(stp->st_stateowner->so_client);
3273 if (filpp) { 3364 if (filpp) {
3274 if (flags & RD_STATE) 3365 if (flags & RD_STATE)
3275 *filpp = find_readable_file(stp->st_file); 3366 *filpp = find_readable_file(stp->st_file);
3276 else 3367 else
3277 *filpp = find_writeable_file(stp->st_file); 3368 *filpp = find_writeable_file(stp->st_file);
3278 } 3369 }
3370 break;
3371 default:
3372 return nfserr_bad_stateid;
3279 } 3373 }
3280 status = nfs_ok; 3374 status = nfs_ok;
3281out: 3375out:
@@ -3283,18 +3377,9 @@ out:
3283} 3377}
3284 3378
3285static __be32 3379static __be32
3286nfsd4_free_delegation_stateid(stateid_t *stateid) 3380nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
3287{ 3381{
3288 struct nfs4_delegation *dp = search_for_delegation(stateid); 3382 if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner)))
3289 if (dp)
3290 return nfserr_locks_held;
3291 return nfserr_bad_stateid;
3292}
3293
3294static __be32
3295nfsd4_free_lock_stateid(struct nfs4_stateid *stp)
3296{
3297 if (check_for_locks(stp->st_file, stp->st_stateowner))
3298 return nfserr_locks_held; 3383 return nfserr_locks_held;
3299 release_lock_stateid(stp); 3384 release_lock_stateid(stp);
3300 return nfs_ok; 3385 return nfs_ok;
@@ -3307,51 +3392,40 @@ __be32
3307nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3392nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3308 struct nfsd4_test_stateid *test_stateid) 3393 struct nfsd4_test_stateid *test_stateid)
3309{ 3394{
3310 test_stateid->ts_has_session = nfsd4_has_session(cstate); 3395 /* real work is done during encoding */
3311 return nfs_ok; 3396 return nfs_ok;
3312} 3397}
3313 3398
3314/*
3315 * Free a state id
3316 */
3317__be32 3399__be32
3318nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3400nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3319 struct nfsd4_free_stateid *free_stateid) 3401 struct nfsd4_free_stateid *free_stateid)
3320{ 3402{
3321 stateid_t *stateid = &free_stateid->fr_stateid; 3403 stateid_t *stateid = &free_stateid->fr_stateid;
3322 struct nfs4_stateid *stp; 3404 struct nfs4_stid *s;
3323 __be32 ret; 3405 struct nfs4_client *cl = cstate->session->se_client;
3406 __be32 ret = nfserr_bad_stateid;
3324 3407
3325 nfs4_lock_state(); 3408 nfs4_lock_state();
3326 if (is_delegation_stateid(stateid)) { 3409 s = find_stateid(cl, stateid);
3327 ret = nfsd4_free_delegation_stateid(stateid); 3410 if (!s)
3328 goto out;
3329 }
3330
3331 stp = search_for_stateid(stateid);
3332 if (!stp) {
3333 ret = nfserr_bad_stateid;
3334 goto out; 3411 goto out;
3335 } 3412 switch (s->sc_type) {
3336 if (stateid->si_generation != 0) { 3413 case NFS4_DELEG_STID:
3337 if (stateid->si_generation < stp->st_stateid.si_generation) {
3338 ret = nfserr_old_stateid;
3339 goto out;
3340 }
3341 if (stateid->si_generation > stp->st_stateid.si_generation) {
3342 ret = nfserr_bad_stateid;
3343 goto out;
3344 }
3345 }
3346
3347 if (is_open_stateid(stp)) {
3348 ret = nfserr_locks_held; 3414 ret = nfserr_locks_held;
3349 goto out; 3415 goto out;
3350 } else { 3416 case NFS4_OPEN_STID:
3351 ret = nfsd4_free_lock_stateid(stp); 3417 case NFS4_LOCK_STID:
3352 goto out; 3418 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
3419 if (ret)
3420 goto out;
3421 if (s->sc_type == NFS4_LOCK_STID)
3422 ret = nfsd4_free_lock_stateid(openlockstateid(s));
3423 else
3424 ret = nfserr_locks_held;
3425 break;
3426 default:
3427 ret = nfserr_bad_stateid;
3353 } 3428 }
3354
3355out: 3429out:
3356 nfs4_unlock_state(); 3430 nfs4_unlock_state();
3357 return ret; 3431 return ret;
@@ -3364,124 +3438,64 @@ setlkflg (int type)
3364 RD_STATE : WR_STATE; 3438 RD_STATE : WR_STATE;
3365} 3439}
3366 3440
3441static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp)
3442{
3443 struct svc_fh *current_fh = &cstate->current_fh;
3444 struct nfs4_stateowner *sop = stp->st_stateowner;
3445 __be32 status;
3446
3447 status = nfsd4_check_seqid(cstate, sop, seqid);
3448 if (status)
3449 return status;
3450 if (stp->st_stid.sc_type == NFS4_CLOSED_STID)
3451 /*
3452 * "Closed" stateid's exist *only* to return
3453 * nfserr_replay_me from the previous step.
3454 */
3455 return nfserr_bad_stateid;
3456 status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
3457 if (status)
3458 return status;
3459 return nfs4_check_fh(current_fh, stp);
3460}
3461
3367/* 3462/*
3368 * Checks for sequence id mutating operations. 3463 * Checks for sequence id mutating operations.
3369 */ 3464 */
3370static __be32 3465static __be32
3371nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, 3466nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3372 stateid_t *stateid, int flags, 3467 stateid_t *stateid, char typemask,
3373 struct nfs4_stateowner **sopp, 3468 struct nfs4_ol_stateid **stpp)
3374 struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
3375{ 3469{
3376 struct nfs4_stateid *stp;
3377 struct nfs4_stateowner *sop;
3378 struct svc_fh *current_fh = &cstate->current_fh;
3379 __be32 status; 3470 __be32 status;
3471 struct nfs4_stid *s;
3380 3472
3381 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, 3473 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
3382 seqid, STATEID_VAL(stateid)); 3474 seqid, STATEID_VAL(stateid));
3383 3475
3384 *stpp = NULL; 3476 *stpp = NULL;
3385 *sopp = NULL; 3477 status = nfsd4_lookup_stateid(stateid, typemask, &s);
3386 3478 if (status)
3387 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { 3479 return status;
3388 dprintk("NFSD: preprocess_seqid_op: magic stateid!\n"); 3480 *stpp = openlockstateid(s);
3389 return nfserr_bad_stateid; 3481 cstate->replay_owner = (*stpp)->st_stateowner;
3390 }
3391
3392 if (STALE_STATEID(stateid))
3393 return nfserr_stale_stateid;
3394
3395 if (nfsd4_has_session(cstate))
3396 flags |= HAS_SESSION;
3397
3398 /*
3399 * We return BAD_STATEID if filehandle doesn't match stateid,
3400 * the confirmed flag is incorrecly set, or the generation
3401 * number is incorrect.
3402 */
3403 stp = find_stateid(stateid, flags);
3404 if (stp == NULL) {
3405 /*
3406 * Also, we should make sure this isn't just the result of
3407 * a replayed close:
3408 */
3409 sop = search_close_lru(stateid->si_stateownerid, flags);
3410 /* It's not stale; let's assume it's expired: */
3411 if (sop == NULL)
3412 return nfserr_expired;
3413 *sopp = sop;
3414 goto check_replay;
3415 }
3416
3417 *stpp = stp;
3418 *sopp = sop = stp->st_stateowner;
3419
3420 if (lock) {
3421 clientid_t *lockclid = &lock->v.new.clientid;
3422 struct nfs4_client *clp = sop->so_client;
3423 int lkflg = 0;
3424 __be32 status;
3425
3426 lkflg = setlkflg(lock->lk_type);
3427
3428 if (lock->lk_is_new) {
3429 if (!sop->so_is_open_owner)
3430 return nfserr_bad_stateid;
3431 if (!(flags & HAS_SESSION) &&
3432 !same_clid(&clp->cl_clientid, lockclid))
3433 return nfserr_bad_stateid;
3434 /* stp is the open stateid */
3435 status = nfs4_check_openmode(stp, lkflg);
3436 if (status)
3437 return status;
3438 } else {
3439 /* stp is the lock stateid */
3440 status = nfs4_check_openmode(stp->st_openstp, lkflg);
3441 if (status)
3442 return status;
3443 }
3444 }
3445 3482
3446 if (nfs4_check_fh(current_fh, stp)) { 3483 return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
3447 dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); 3484}
3448 return nfserr_bad_stateid;
3449 }
3450 3485
3451 /* 3486static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp)
3452 * We now validate the seqid and stateid generation numbers. 3487{
3453 * For the moment, we ignore the possibility of 3488 __be32 status;
3454 * generation number wraparound. 3489 struct nfs4_openowner *oo;
3455 */
3456 if (!(flags & HAS_SESSION) && seqid != sop->so_seqid)
3457 goto check_replay;
3458 3490
3459 if (sop->so_confirmed && flags & CONFIRM) { 3491 status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
3460 dprintk("NFSD: preprocess_seqid_op: expected" 3492 NFS4_OPEN_STID, stpp);
3461 " unconfirmed stateowner!\n");
3462 return nfserr_bad_stateid;
3463 }
3464 if (!sop->so_confirmed && !(flags & CONFIRM)) {
3465 dprintk("NFSD: preprocess_seqid_op: stateowner not"
3466 " confirmed yet!\n");
3467 return nfserr_bad_stateid;
3468 }
3469 status = check_stateid_generation(stateid, &stp->st_stateid, flags);
3470 if (status) 3493 if (status)
3471 return status; 3494 return status;
3472 renew_client(sop->so_client); 3495 oo = openowner((*stpp)->st_stateowner);
3496 if (!(oo->oo_flags & NFS4_OO_CONFIRMED))
3497 return nfserr_bad_stateid;
3473 return nfs_ok; 3498 return nfs_ok;
3474
3475check_replay:
3476 if (seqid == sop->so_seqid - 1) {
3477 dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
3478 /* indicate replay to calling function */
3479 return nfserr_replay_me;
3480 }
3481 dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
3482 sop->so_seqid, seqid);
3483 *sopp = NULL;
3484 return nfserr_bad_seqid;
3485} 3499}
3486 3500
3487__be32 3501__be32
@@ -3489,8 +3503,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3489 struct nfsd4_open_confirm *oc) 3503 struct nfsd4_open_confirm *oc)
3490{ 3504{
3491 __be32 status; 3505 __be32 status;
3492 struct nfs4_stateowner *sop; 3506 struct nfs4_openowner *oo;
3493 struct nfs4_stateid *stp; 3507 struct nfs4_ol_stateid *stp;
3494 3508
3495 dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", 3509 dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
3496 (int)cstate->current_fh.fh_dentry->d_name.len, 3510 (int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3502,38 +3516,52 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3502 3516
3503 nfs4_lock_state(); 3517 nfs4_lock_state();
3504 3518
3505 if ((status = nfs4_preprocess_seqid_op(cstate, 3519 status = nfs4_preprocess_seqid_op(cstate,
3506 oc->oc_seqid, &oc->oc_req_stateid, 3520 oc->oc_seqid, &oc->oc_req_stateid,
3507 CONFIRM | OPEN_STATE, 3521 NFS4_OPEN_STID, &stp);
3508 &oc->oc_stateowner, &stp, NULL))) 3522 if (status)
3509 goto out; 3523 goto out;
3510 3524 oo = openowner(stp->st_stateowner);
3511 sop = oc->oc_stateowner; 3525 status = nfserr_bad_stateid;
3512 sop->so_confirmed = 1; 3526 if (oo->oo_flags & NFS4_OO_CONFIRMED)
3513 update_stateid(&stp->st_stateid); 3527 goto out;
3514 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); 3528 oo->oo_flags |= NFS4_OO_CONFIRMED;
3529 update_stateid(&stp->st_stid.sc_stateid);
3530 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
3515 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", 3531 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3516 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid)); 3532 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
3517 3533
3518 nfsd4_create_clid_dir(sop->so_client); 3534 nfsd4_create_clid_dir(oo->oo_owner.so_client);
3535 status = nfs_ok;
3519out: 3536out:
3520 if (oc->oc_stateowner) { 3537 if (!cstate->replay_owner)
3521 nfs4_get_stateowner(oc->oc_stateowner); 3538 nfs4_unlock_state();
3522 cstate->replay_owner = oc->oc_stateowner;
3523 }
3524 nfs4_unlock_state();
3525 return status; 3539 return status;
3526} 3540}
3527 3541
3528static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to_access) 3542static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access)
3529{ 3543{
3530 int i; 3544 if (!test_bit(access, &stp->st_access_bmap))
3545 return;
3546 nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access));
3547 __clear_bit(access, &stp->st_access_bmap);
3548}
3531 3549
3532 for (i = 1; i < 4; i++) { 3550static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access)
3533 if (test_bit(i, &stp->st_access_bmap) && !(i & to_access)) { 3551{
3534 nfs4_file_put_access(stp->st_file, i); 3552 switch (to_access) {
3535 __clear_bit(i, &stp->st_access_bmap); 3553 case NFS4_SHARE_ACCESS_READ:
3536 } 3554 nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE);
3555 nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
3556 break;
3557 case NFS4_SHARE_ACCESS_WRITE:
3558 nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ);
3559 nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
3560 break;
3561 case NFS4_SHARE_ACCESS_BOTH:
3562 break;
3563 default:
3564 BUG();
3537 } 3565 }
3538} 3566}
3539 3567
@@ -3553,24 +3581,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3553 struct nfsd4_open_downgrade *od) 3581 struct nfsd4_open_downgrade *od)
3554{ 3582{
3555 __be32 status; 3583 __be32 status;
3556 struct nfs4_stateid *stp; 3584 struct nfs4_ol_stateid *stp;
3557 3585
3558 dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 3586 dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
3559 (int)cstate->current_fh.fh_dentry->d_name.len, 3587 (int)cstate->current_fh.fh_dentry->d_name.len,
3560 cstate->current_fh.fh_dentry->d_name.name); 3588 cstate->current_fh.fh_dentry->d_name.name);
3561 3589
3562 if (!access_valid(od->od_share_access, cstate->minorversion) 3590 /* We don't yet support WANT bits: */
3563 || !deny_valid(od->od_share_deny)) 3591 od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
3564 return nfserr_inval;
3565 3592
3566 nfs4_lock_state(); 3593 nfs4_lock_state();
3567 if ((status = nfs4_preprocess_seqid_op(cstate, 3594 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
3568 od->od_seqid, 3595 &od->od_stateid, &stp);
3569 &od->od_stateid, 3596 if (status)
3570 OPEN_STATE,
3571 &od->od_stateowner, &stp, NULL)))
3572 goto out; 3597 goto out;
3573
3574 status = nfserr_inval; 3598 status = nfserr_inval;
3575 if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { 3599 if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
3576 dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", 3600 dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
@@ -3582,22 +3606,45 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3582 stp->st_deny_bmap, od->od_share_deny); 3606 stp->st_deny_bmap, od->od_share_deny);
3583 goto out; 3607 goto out;
3584 } 3608 }
3585 nfs4_file_downgrade(stp, od->od_share_access); 3609 nfs4_stateid_downgrade(stp, od->od_share_access);
3586 3610
3587 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); 3611 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
3588 3612
3589 update_stateid(&stp->st_stateid); 3613 update_stateid(&stp->st_stid.sc_stateid);
3590 memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); 3614 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
3591 status = nfs_ok; 3615 status = nfs_ok;
3592out: 3616out:
3593 if (od->od_stateowner) { 3617 if (!cstate->replay_owner)
3594 nfs4_get_stateowner(od->od_stateowner); 3618 nfs4_unlock_state();
3595 cstate->replay_owner = od->od_stateowner;
3596 }
3597 nfs4_unlock_state();
3598 return status; 3619 return status;
3599} 3620}
3600 3621
3622void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so)
3623{
3624 struct nfs4_openowner *oo;
3625 struct nfs4_ol_stateid *s;
3626
3627 if (!so->so_is_open_owner)
3628 return;
3629 oo = openowner(so);
3630 s = oo->oo_last_closed_stid;
3631 if (!s)
3632 return;
3633 if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) {
3634 /* Release the last_closed_stid on the next seqid bump: */
3635 oo->oo_flags |= NFS4_OO_PURGE_CLOSE;
3636 return;
3637 }
3638 oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE;
3639 release_last_closed_stateid(oo);
3640}
3641
3642static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
3643{
3644 unhash_open_stateid(s);
3645 s->st_stid.sc_type = NFS4_CLOSED_STID;
3646}
3647
3601/* 3648/*
3602 * nfs4_unlock_state() called after encode 3649 * nfs4_unlock_state() called after encode
3603 */ 3650 */
@@ -3606,39 +3653,37 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3606 struct nfsd4_close *close) 3653 struct nfsd4_close *close)
3607{ 3654{
3608 __be32 status; 3655 __be32 status;
3609 struct nfs4_stateid *stp; 3656 struct nfs4_openowner *oo;
3657 struct nfs4_ol_stateid *stp;
3610 3658
3611 dprintk("NFSD: nfsd4_close on file %.*s\n", 3659 dprintk("NFSD: nfsd4_close on file %.*s\n",
3612 (int)cstate->current_fh.fh_dentry->d_name.len, 3660 (int)cstate->current_fh.fh_dentry->d_name.len,
3613 cstate->current_fh.fh_dentry->d_name.name); 3661 cstate->current_fh.fh_dentry->d_name.name);
3614 3662
3615 nfs4_lock_state(); 3663 nfs4_lock_state();
3616 /* check close_lru for replay */ 3664 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
3617 if ((status = nfs4_preprocess_seqid_op(cstate, 3665 &close->cl_stateid,
3618 close->cl_seqid, 3666 NFS4_OPEN_STID|NFS4_CLOSED_STID,
3619 &close->cl_stateid, 3667 &stp);
3620 OPEN_STATE | CLOSE_STATE, 3668 if (status)
3621 &close->cl_stateowner, &stp, NULL)))
3622 goto out; 3669 goto out;
3670 oo = openowner(stp->st_stateowner);
3623 status = nfs_ok; 3671 status = nfs_ok;
3624 update_stateid(&stp->st_stateid); 3672 update_stateid(&stp->st_stid.sc_stateid);
3625 memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); 3673 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
3626 3674
3627 /* release_stateid() calls nfsd_close() if needed */ 3675 nfsd4_close_open_stateid(stp);
3628 release_open_stateid(stp); 3676 oo->oo_last_closed_stid = stp;
3629 3677
3630 /* place unused nfs4_stateowners on so_close_lru list to be 3678 /* place unused nfs4_stateowners on so_close_lru list to be
3631 * released by the laundromat service after the lease period 3679 * released by the laundromat service after the lease period
3632 * to enable us to handle CLOSE replay 3680 * to enable us to handle CLOSE replay
3633 */ 3681 */
3634 if (list_empty(&close->cl_stateowner->so_stateids)) 3682 if (list_empty(&oo->oo_owner.so_stateids))
3635 move_to_close_lru(close->cl_stateowner); 3683 move_to_close_lru(oo);
3636out: 3684out:
3637 if (close->cl_stateowner) { 3685 if (!cstate->replay_owner)
3638 nfs4_get_stateowner(close->cl_stateowner); 3686 nfs4_unlock_state();
3639 cstate->replay_owner = close->cl_stateowner;
3640 }
3641 nfs4_unlock_state();
3642 return status; 3687 return status;
3643} 3688}
3644 3689
@@ -3648,34 +3693,22 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3648{ 3693{
3649 struct nfs4_delegation *dp; 3694 struct nfs4_delegation *dp;
3650 stateid_t *stateid = &dr->dr_stateid; 3695 stateid_t *stateid = &dr->dr_stateid;
3696 struct nfs4_stid *s;
3651 struct inode *inode; 3697 struct inode *inode;
3652 __be32 status; 3698 __be32 status;
3653 int flags = 0;
3654 3699
3655 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 3700 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
3656 return status; 3701 return status;
3657 inode = cstate->current_fh.fh_dentry->d_inode; 3702 inode = cstate->current_fh.fh_dentry->d_inode;
3658 3703
3659 if (nfsd4_has_session(cstate))
3660 flags |= HAS_SESSION;
3661 nfs4_lock_state(); 3704 nfs4_lock_state();
3662 status = nfserr_bad_stateid; 3705 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s);
3663 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 3706 if (status)
3664 goto out;
3665 status = nfserr_stale_stateid;
3666 if (STALE_STATEID(stateid))
3667 goto out;
3668 status = nfserr_bad_stateid;
3669 if (!is_delegation_stateid(stateid))
3670 goto out;
3671 status = nfserr_expired;
3672 dp = find_delegation_stateid(inode, stateid);
3673 if (!dp)
3674 goto out; 3707 goto out;
3675 status = check_stateid_generation(stateid, &dp->dl_stateid, flags); 3708 dp = delegstateid(s);
3709 status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
3676 if (status) 3710 if (status)
3677 goto out; 3711 goto out;
3678 renew_client(dp->dl_client);
3679 3712
3680 unhash_delegation(dp); 3713 unhash_delegation(dp);
3681out: 3714out:
@@ -3713,9 +3746,6 @@ last_byte_offset(u64 start, u64 len)
3713 return end > start ? end - 1: NFS4_MAX_UINT64; 3746 return end > start ? end - 1: NFS4_MAX_UINT64;
3714} 3747}
3715 3748
3716#define lockownerid_hashval(id) \
3717 ((id) & LOCK_HASH_MASK)
3718
3719static inline unsigned int 3749static inline unsigned int
3720lock_ownerstr_hashval(struct inode *inode, u32 cl_id, 3750lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
3721 struct xdr_netobj *ownername) 3751 struct xdr_netobj *ownername)
@@ -3725,101 +3755,7 @@ lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
3725 & LOCK_HASH_MASK; 3755 & LOCK_HASH_MASK;
3726} 3756}
3727 3757
3728static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
3729static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; 3758static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
3730static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
3731
3732static int
3733same_stateid(stateid_t *id_one, stateid_t *id_two)
3734{
3735 if (id_one->si_stateownerid != id_two->si_stateownerid)
3736 return 0;
3737 return id_one->si_fileid == id_two->si_fileid;
3738}
3739
3740static struct nfs4_stateid *
3741find_stateid(stateid_t *stid, int flags)
3742{
3743 struct nfs4_stateid *local;
3744 u32 st_id = stid->si_stateownerid;
3745 u32 f_id = stid->si_fileid;
3746 unsigned int hashval;
3747
3748 dprintk("NFSD: find_stateid flags 0x%x\n",flags);
3749 if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) {
3750 hashval = stateid_hashval(st_id, f_id);
3751 list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
3752 if ((local->st_stateid.si_stateownerid == st_id) &&
3753 (local->st_stateid.si_fileid == f_id))
3754 return local;
3755 }
3756 }
3757
3758 if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) {
3759 hashval = stateid_hashval(st_id, f_id);
3760 list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
3761 if ((local->st_stateid.si_stateownerid == st_id) &&
3762 (local->st_stateid.si_fileid == f_id))
3763 return local;
3764 }
3765 }
3766 return NULL;
3767}
3768
3769static struct nfs4_stateid *
3770search_for_stateid(stateid_t *stid)
3771{
3772 struct nfs4_stateid *local;
3773 unsigned int hashval = stateid_hashval(stid->si_stateownerid, stid->si_fileid);
3774
3775 list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
3776 if (same_stateid(&local->st_stateid, stid))
3777 return local;
3778 }
3779
3780 list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
3781 if (same_stateid(&local->st_stateid, stid))
3782 return local;
3783 }
3784 return NULL;
3785}
3786
3787static struct nfs4_delegation *
3788search_for_delegation(stateid_t *stid)
3789{
3790 struct nfs4_file *fp;
3791 struct nfs4_delegation *dp;
3792 struct list_head *pos;
3793 int i;
3794
3795 for (i = 0; i < FILE_HASH_SIZE; i++) {
3796 list_for_each_entry(fp, &file_hashtbl[i], fi_hash) {
3797 list_for_each(pos, &fp->fi_delegations) {
3798 dp = list_entry(pos, struct nfs4_delegation, dl_perfile);
3799 if (same_stateid(&dp->dl_stateid, stid))
3800 return dp;
3801 }
3802 }
3803 }
3804 return NULL;
3805}
3806
3807static struct nfs4_delegation *
3808find_delegation_stateid(struct inode *ino, stateid_t *stid)
3809{
3810 struct nfs4_file *fp;
3811 struct nfs4_delegation *dl;
3812
3813 dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
3814 STATEID_VAL(stid));
3815
3816 fp = find_file(ino);
3817 if (!fp)
3818 return NULL;
3819 dl = find_delegation_file(fp, stid);
3820 put_nfs4_file(fp);
3821 return dl;
3822}
3823 3759
3824/* 3760/*
3825 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that 3761 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3846,15 +3782,21 @@ static const struct lock_manager_operations nfsd_posix_mng_ops = {
3846static inline void 3782static inline void
3847nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 3783nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
3848{ 3784{
3849 struct nfs4_stateowner *sop; 3785 struct nfs4_lockowner *lo;
3850 3786
3851 if (fl->fl_lmops == &nfsd_posix_mng_ops) { 3787 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
3852 sop = (struct nfs4_stateowner *) fl->fl_owner; 3788 lo = (struct nfs4_lockowner *) fl->fl_owner;
3853 kref_get(&sop->so_ref); 3789 deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
3854 deny->ld_sop = sop; 3790 lo->lo_owner.so_owner.len, GFP_KERNEL);
3855 deny->ld_clientid = sop->so_client->cl_clientid; 3791 if (!deny->ld_owner.data)
3792 /* We just don't care that much */
3793 goto nevermind;
3794 deny->ld_owner.len = lo->lo_owner.so_owner.len;
3795 deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
3856 } else { 3796 } else {
3857 deny->ld_sop = NULL; 3797nevermind:
3798 deny->ld_owner.len = 0;
3799 deny->ld_owner.data = NULL;
3858 deny->ld_clientid.cl_boot = 0; 3800 deny->ld_clientid.cl_boot = 0;
3859 deny->ld_clientid.cl_id = 0; 3801 deny->ld_clientid.cl_id = 0;
3860 } 3802 }
@@ -3867,8 +3809,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
3867 deny->ld_type = NFS4_WRITE_LT; 3809 deny->ld_type = NFS4_WRITE_LT;
3868} 3810}
3869 3811
3870static struct nfs4_stateowner * 3812static struct nfs4_lockowner *
3871find_lockstateowner_str(struct inode *inode, clientid_t *clid, 3813find_lockowner_str(struct inode *inode, clientid_t *clid,
3872 struct xdr_netobj *owner) 3814 struct xdr_netobj *owner)
3873{ 3815{
3874 unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); 3816 unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
@@ -3876,11 +3818,17 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3876 3818
3877 list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { 3819 list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
3878 if (same_owner_str(op, owner, clid)) 3820 if (same_owner_str(op, owner, clid))
3879 return op; 3821 return lockowner(op);
3880 } 3822 }
3881 return NULL; 3823 return NULL;
3882} 3824}
3883 3825
3826static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
3827{
3828 list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
3829 list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
3830}
3831
3884/* 3832/*
3885 * Alloc a lock owner structure. 3833 * Alloc a lock owner structure.
3886 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 3834 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
@@ -3889,67 +3837,40 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3889 * strhashval = lock_ownerstr_hashval 3837 * strhashval = lock_ownerstr_hashval
3890 */ 3838 */
3891 3839
3892static struct nfs4_stateowner * 3840static struct nfs4_lockowner *
3893alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { 3841alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) {
3894 struct nfs4_stateowner *sop; 3842 struct nfs4_lockowner *lo;
3895 struct nfs4_replay *rp;
3896 unsigned int idhashval;
3897 3843
3898 if (!(sop = alloc_stateowner(&lock->lk_new_owner))) 3844 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
3845 if (!lo)
3899 return NULL; 3846 return NULL;
3900 idhashval = lockownerid_hashval(current_ownerid); 3847 INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
3901 INIT_LIST_HEAD(&sop->so_idhash); 3848 lo->lo_owner.so_is_open_owner = 0;
3902 INIT_LIST_HEAD(&sop->so_strhash);
3903 INIT_LIST_HEAD(&sop->so_perclient);
3904 INIT_LIST_HEAD(&sop->so_stateids);
3905 INIT_LIST_HEAD(&sop->so_perstateid);
3906 INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
3907 sop->so_time = 0;
3908 list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
3909 list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
3910 list_add(&sop->so_perstateid, &open_stp->st_lockowners);
3911 sop->so_is_open_owner = 0;
3912 sop->so_id = current_ownerid++;
3913 sop->so_client = clp;
3914 /* It is the openowner seqid that will be incremented in encode in the 3849 /* It is the openowner seqid that will be incremented in encode in the
3915 * case of new lockowners; so increment the lock seqid manually: */ 3850 * case of new lockowners; so increment the lock seqid manually: */
3916 sop->so_seqid = lock->lk_new_lock_seqid + 1; 3851 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1;
3917 sop->so_confirmed = 1; 3852 hash_lockowner(lo, strhashval, clp, open_stp);
3918 rp = &sop->so_replay; 3853 return lo;
3919 rp->rp_status = nfserr_serverfault;
3920 rp->rp_buflen = 0;
3921 rp->rp_buf = rp->rp_ibuf;
3922 return sop;
3923} 3854}
3924 3855
3925static struct nfs4_stateid * 3856static struct nfs4_ol_stateid *
3926alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) 3857alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp)
3927{ 3858{
3928 struct nfs4_stateid *stp; 3859 struct nfs4_ol_stateid *stp;
3929 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 3860 struct nfs4_client *clp = lo->lo_owner.so_client;
3930 3861
3931 stp = nfs4_alloc_stateid(); 3862 stp = nfs4_alloc_stateid(clp);
3932 if (stp == NULL) 3863 if (stp == NULL)
3933 goto out; 3864 return NULL;
3934 INIT_LIST_HEAD(&stp->st_hash); 3865 init_stid(&stp->st_stid, clp, NFS4_LOCK_STID);
3935 INIT_LIST_HEAD(&stp->st_perfile);
3936 INIT_LIST_HEAD(&stp->st_perstateowner);
3937 INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
3938 list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
3939 list_add(&stp->st_perfile, &fp->fi_stateids); 3866 list_add(&stp->st_perfile, &fp->fi_stateids);
3940 list_add(&stp->st_perstateowner, &sop->so_stateids); 3867 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
3941 stp->st_stateowner = sop; 3868 stp->st_stateowner = &lo->lo_owner;
3942 get_nfs4_file(fp); 3869 get_nfs4_file(fp);
3943 stp->st_file = fp; 3870 stp->st_file = fp;
3944 stp->st_stateid.si_boot = boot_time;
3945 stp->st_stateid.si_stateownerid = sop->so_id;
3946 stp->st_stateid.si_fileid = fp->fi_id;
3947 stp->st_stateid.si_generation = 0;
3948 stp->st_access_bmap = 0; 3871 stp->st_access_bmap = 0;
3949 stp->st_deny_bmap = open_stp->st_deny_bmap; 3872 stp->st_deny_bmap = open_stp->st_deny_bmap;
3950 stp->st_openstp = open_stp; 3873 stp->st_openstp = open_stp;
3951
3952out:
3953 return stp; 3874 return stp;
3954} 3875}
3955 3876
@@ -3960,7 +3881,7 @@ check_lock_length(u64 offset, u64 length)
3960 LOFF_OVERFLOW(offset, length))); 3881 LOFF_OVERFLOW(offset, length)));
3961} 3882}
3962 3883
3963static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access) 3884static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
3964{ 3885{
3965 struct nfs4_file *fp = lock_stp->st_file; 3886 struct nfs4_file *fp = lock_stp->st_file;
3966 int oflag = nfs4_access_to_omode(access); 3887 int oflag = nfs4_access_to_omode(access);
@@ -3978,15 +3899,16 @@ __be32
3978nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3899nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3979 struct nfsd4_lock *lock) 3900 struct nfsd4_lock *lock)
3980{ 3901{
3981 struct nfs4_stateowner *open_sop = NULL; 3902 struct nfs4_openowner *open_sop = NULL;
3982 struct nfs4_stateowner *lock_sop = NULL; 3903 struct nfs4_lockowner *lock_sop = NULL;
3983 struct nfs4_stateid *lock_stp; 3904 struct nfs4_ol_stateid *lock_stp;
3984 struct nfs4_file *fp; 3905 struct nfs4_file *fp;
3985 struct file *filp = NULL; 3906 struct file *filp = NULL;
3986 struct file_lock file_lock; 3907 struct file_lock file_lock;
3987 struct file_lock conflock; 3908 struct file_lock conflock;
3988 __be32 status = 0; 3909 __be32 status = 0;
3989 unsigned int strhashval; 3910 unsigned int strhashval;
3911 int lkflg;
3990 int err; 3912 int err;
3991 3913
3992 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", 3914 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -4010,7 +3932,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4010 * Use open owner and open stateid to create lock owner and 3932 * Use open owner and open stateid to create lock owner and
4011 * lock stateid. 3933 * lock stateid.
4012 */ 3934 */
4013 struct nfs4_stateid *open_stp = NULL; 3935 struct nfs4_ol_stateid *open_stp = NULL;
4014 3936
4015 status = nfserr_stale_clientid; 3937 status = nfserr_stale_clientid;
4016 if (!nfsd4_has_session(cstate) && 3938 if (!nfsd4_has_session(cstate) &&
@@ -4018,26 +3940,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4018 goto out; 3940 goto out;
4019 3941
4020 /* validate and update open stateid and open seqid */ 3942 /* validate and update open stateid and open seqid */
4021 status = nfs4_preprocess_seqid_op(cstate, 3943 status = nfs4_preprocess_confirmed_seqid_op(cstate,
4022 lock->lk_new_open_seqid, 3944 lock->lk_new_open_seqid,
4023 &lock->lk_new_open_stateid, 3945 &lock->lk_new_open_stateid,
4024 OPEN_STATE, 3946 &open_stp);
4025 &lock->lk_replay_owner, &open_stp,
4026 lock);
4027 if (status) 3947 if (status)
4028 goto out; 3948 goto out;
4029 open_sop = lock->lk_replay_owner; 3949 open_sop = openowner(open_stp->st_stateowner);
3950 status = nfserr_bad_stateid;
3951 if (!nfsd4_has_session(cstate) &&
3952 !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
3953 &lock->v.new.clientid))
3954 goto out;
4030 /* create lockowner and lock stateid */ 3955 /* create lockowner and lock stateid */
4031 fp = open_stp->st_file; 3956 fp = open_stp->st_file;
4032 strhashval = lock_ownerstr_hashval(fp->fi_inode, 3957 strhashval = lock_ownerstr_hashval(fp->fi_inode,
4033 open_sop->so_client->cl_clientid.cl_id, 3958 open_sop->oo_owner.so_client->cl_clientid.cl_id,
4034 &lock->v.new.owner); 3959 &lock->v.new.owner);
4035 /* XXX: Do we need to check for duplicate stateowners on 3960 /* XXX: Do we need to check for duplicate stateowners on
4036 * the same file, or should they just be allowed (and 3961 * the same file, or should they just be allowed (and
4037 * create new stateids)? */ 3962 * create new stateids)? */
4038 status = nfserr_resource; 3963 status = nfserr_jukebox;
4039 lock_sop = alloc_init_lock_stateowner(strhashval, 3964 lock_sop = alloc_init_lock_stateowner(strhashval,
4040 open_sop->so_client, open_stp, lock); 3965 open_sop->oo_owner.so_client, open_stp, lock);
4041 if (lock_sop == NULL) 3966 if (lock_sop == NULL)
4042 goto out; 3967 goto out;
4043 lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); 3968 lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
@@ -4046,16 +3971,20 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4046 } else { 3971 } else {
4047 /* lock (lock owner + lock stateid) already exists */ 3972 /* lock (lock owner + lock stateid) already exists */
4048 status = nfs4_preprocess_seqid_op(cstate, 3973 status = nfs4_preprocess_seqid_op(cstate,
4049 lock->lk_old_lock_seqid, 3974 lock->lk_old_lock_seqid,
4050 &lock->lk_old_lock_stateid, 3975 &lock->lk_old_lock_stateid,
4051 LOCK_STATE, 3976 NFS4_LOCK_STID, &lock_stp);
4052 &lock->lk_replay_owner, &lock_stp, lock);
4053 if (status) 3977 if (status)
4054 goto out; 3978 goto out;
4055 lock_sop = lock->lk_replay_owner; 3979 lock_sop = lockowner(lock_stp->st_stateowner);
4056 fp = lock_stp->st_file; 3980 fp = lock_stp->st_file;
4057 } 3981 }
4058 /* lock->lk_replay_owner and lock_stp have been created or found */ 3982 /* lock_sop and lock_stp have been created or found */
3983
3984 lkflg = setlkflg(lock->lk_type);
3985 status = nfs4_check_openmode(lock_stp, lkflg);
3986 if (status)
3987 goto out;
4059 3988
4060 status = nfserr_grace; 3989 status = nfserr_grace;
4061 if (locks_in_grace() && !lock->lk_reclaim) 3990 if (locks_in_grace() && !lock->lk_reclaim)
@@ -4106,8 +4035,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4106 err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); 4035 err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock);
4107 switch (-err) { 4036 switch (-err) {
4108 case 0: /* success! */ 4037 case 0: /* success! */
4109 update_stateid(&lock_stp->st_stateid); 4038 update_stateid(&lock_stp->st_stid.sc_stateid);
4110 memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 4039 memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
4111 sizeof(stateid_t)); 4040 sizeof(stateid_t));
4112 status = 0; 4041 status = 0;
4113 break; 4042 break;
@@ -4119,19 +4048,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4119 case (EDEADLK): 4048 case (EDEADLK):
4120 status = nfserr_deadlock; 4049 status = nfserr_deadlock;
4121 break; 4050 break;
4122 default: 4051 default:
4123 dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); 4052 dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
4124 status = nfserr_resource; 4053 status = nfserrno(err);
4125 break; 4054 break;
4126 } 4055 }
4127out: 4056out:
4128 if (status && lock->lk_is_new && lock_sop) 4057 if (status && lock->lk_is_new && lock_sop)
4129 release_lockowner(lock_sop); 4058 release_lockowner(lock_sop);
4130 if (lock->lk_replay_owner) { 4059 if (!cstate->replay_owner)
4131 nfs4_get_stateowner(lock->lk_replay_owner); 4060 nfs4_unlock_state();
4132 cstate->replay_owner = lock->lk_replay_owner;
4133 }
4134 nfs4_unlock_state();
4135 return status; 4061 return status;
4136} 4062}
4137 4063
@@ -4163,6 +4089,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4163{ 4089{
4164 struct inode *inode; 4090 struct inode *inode;
4165 struct file_lock file_lock; 4091 struct file_lock file_lock;
4092 struct nfs4_lockowner *lo;
4166 int error; 4093 int error;
4167 __be32 status; 4094 __be32 status;
4168 4095
@@ -4172,19 +4099,14 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4172 if (check_lock_length(lockt->lt_offset, lockt->lt_length)) 4099 if (check_lock_length(lockt->lt_offset, lockt->lt_length))
4173 return nfserr_inval; 4100 return nfserr_inval;
4174 4101
4175 lockt->lt_stateowner = NULL;
4176 nfs4_lock_state(); 4102 nfs4_lock_state();
4177 4103
4178 status = nfserr_stale_clientid; 4104 status = nfserr_stale_clientid;
4179 if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) 4105 if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
4180 goto out; 4106 goto out;
4181 4107
4182 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { 4108 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4183 dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
4184 if (status == nfserr_symlink)
4185 status = nfserr_inval;
4186 goto out; 4109 goto out;
4187 }
4188 4110
4189 inode = cstate->current_fh.fh_dentry->d_inode; 4111 inode = cstate->current_fh.fh_dentry->d_inode;
4190 locks_init_lock(&file_lock); 4112 locks_init_lock(&file_lock);
@@ -4203,10 +4125,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4203 goto out; 4125 goto out;
4204 } 4126 }
4205 4127
4206 lockt->lt_stateowner = find_lockstateowner_str(inode, 4128 lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner);
4207 &lockt->lt_clientid, &lockt->lt_owner); 4129 if (lo)
4208 if (lockt->lt_stateowner) 4130 file_lock.fl_owner = (fl_owner_t)lo;
4209 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
4210 file_lock.fl_pid = current->tgid; 4131 file_lock.fl_pid = current->tgid;
4211 file_lock.fl_flags = FL_POSIX; 4132 file_lock.fl_flags = FL_POSIX;
4212 4133
@@ -4234,7 +4155,7 @@ __be32
4234nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 4155nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4235 struct nfsd4_locku *locku) 4156 struct nfsd4_locku *locku)
4236{ 4157{
4237 struct nfs4_stateid *stp; 4158 struct nfs4_ol_stateid *stp;
4238 struct file *filp = NULL; 4159 struct file *filp = NULL;
4239 struct file_lock file_lock; 4160 struct file_lock file_lock;
4240 __be32 status; 4161 __be32 status;
@@ -4249,13 +4170,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4249 4170
4250 nfs4_lock_state(); 4171 nfs4_lock_state();
4251 4172
4252 if ((status = nfs4_preprocess_seqid_op(cstate, 4173 status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
4253 locku->lu_seqid, 4174 &locku->lu_stateid, NFS4_LOCK_STID, &stp);
4254 &locku->lu_stateid, 4175 if (status)
4255 LOCK_STATE,
4256 &locku->lu_stateowner, &stp, NULL)))
4257 goto out; 4176 goto out;
4258
4259 filp = find_any_file(stp->st_file); 4177 filp = find_any_file(stp->st_file);
4260 if (!filp) { 4178 if (!filp) {
4261 status = nfserr_lock_range; 4179 status = nfserr_lock_range;
@@ -4264,7 +4182,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4264 BUG_ON(!filp); 4182 BUG_ON(!filp);
4265 locks_init_lock(&file_lock); 4183 locks_init_lock(&file_lock);
4266 file_lock.fl_type = F_UNLCK; 4184 file_lock.fl_type = F_UNLCK;
4267 file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner; 4185 file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
4268 file_lock.fl_pid = current->tgid; 4186 file_lock.fl_pid = current->tgid;
4269 file_lock.fl_file = filp; 4187 file_lock.fl_file = filp;
4270 file_lock.fl_flags = FL_POSIX; 4188 file_lock.fl_flags = FL_POSIX;
@@ -4285,15 +4203,12 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4285 /* 4203 /*
4286 * OK, unlock succeeded; the only thing left to do is update the stateid. 4204 * OK, unlock succeeded; the only thing left to do is update the stateid.
4287 */ 4205 */
4288 update_stateid(&stp->st_stateid); 4206 update_stateid(&stp->st_stid.sc_stateid);
4289 memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); 4207 memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4290 4208
4291out: 4209out:
4292 if (locku->lu_stateowner) { 4210 if (!cstate->replay_owner)
4293 nfs4_get_stateowner(locku->lu_stateowner); 4211 nfs4_unlock_state();
4294 cstate->replay_owner = locku->lu_stateowner;
4295 }
4296 nfs4_unlock_state();
4297 return status; 4212 return status;
4298 4213
4299out_nfserr: 4214out_nfserr:
@@ -4307,7 +4222,7 @@ out_nfserr:
4307 * 0: no locks held by lockowner 4222 * 0: no locks held by lockowner
4308 */ 4223 */
4309static int 4224static int
4310check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) 4225check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
4311{ 4226{
4312 struct file_lock **flpp; 4227 struct file_lock **flpp;
4313 struct inode *inode = filp->fi_inode; 4228 struct inode *inode = filp->fi_inode;
@@ -4332,7 +4247,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4332{ 4247{
4333 clientid_t *clid = &rlockowner->rl_clientid; 4248 clientid_t *clid = &rlockowner->rl_clientid;
4334 struct nfs4_stateowner *sop; 4249 struct nfs4_stateowner *sop;
4335 struct nfs4_stateid *stp; 4250 struct nfs4_lockowner *lo;
4251 struct nfs4_ol_stateid *stp;
4336 struct xdr_netobj *owner = &rlockowner->rl_owner; 4252 struct xdr_netobj *owner = &rlockowner->rl_owner;
4337 struct list_head matches; 4253 struct list_head matches;
4338 int i; 4254 int i;
@@ -4356,16 +4272,15 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4356 * data structures. */ 4272 * data structures. */
4357 INIT_LIST_HEAD(&matches); 4273 INIT_LIST_HEAD(&matches);
4358 for (i = 0; i < LOCK_HASH_SIZE; i++) { 4274 for (i = 0; i < LOCK_HASH_SIZE; i++) {
4359 list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { 4275 list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
4360 if (!same_owner_str(sop, owner, clid)) 4276 if (!same_owner_str(sop, owner, clid))
4361 continue; 4277 continue;
4362 list_for_each_entry(stp, &sop->so_stateids, 4278 list_for_each_entry(stp, &sop->so_stateids,
4363 st_perstateowner) { 4279 st_perstateowner) {
4364 if (check_for_locks(stp->st_file, sop)) 4280 lo = lockowner(sop);
4281 if (check_for_locks(stp->st_file, lo))
4365 goto out; 4282 goto out;
4366 /* Note: so_perclient unused for lockowners, 4283 list_add(&lo->lo_list, &matches);
4367 * so it's OK to fool with here. */
4368 list_add(&sop->so_perclient, &matches);
4369 } 4284 }
4370 } 4285 }
4371 } 4286 }
@@ -4374,12 +4289,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4374 * have been checked. */ 4289 * have been checked. */
4375 status = nfs_ok; 4290 status = nfs_ok;
4376 while (!list_empty(&matches)) { 4291 while (!list_empty(&matches)) {
4377 sop = list_entry(matches.next, struct nfs4_stateowner, 4292 lo = list_entry(matches.next, struct nfs4_lockowner,
4378 so_perclient); 4293 lo_list);
4379 /* unhash_stateowner deletes so_perclient only 4294 /* unhash_stateowner deletes so_perclient only
4380 * for openowners. */ 4295 * for openowners. */
4381 list_del(&sop->so_perclient); 4296 list_del(&lo->lo_list);
4382 release_lockowner(sop); 4297 release_lockowner(lo);
4383 } 4298 }
4384out: 4299out:
4385 nfs4_unlock_state(); 4300 nfs4_unlock_state();
@@ -4501,16 +4416,10 @@ nfs4_state_init(void)
4501 for (i = 0; i < FILE_HASH_SIZE; i++) { 4416 for (i = 0; i < FILE_HASH_SIZE; i++) {
4502 INIT_LIST_HEAD(&file_hashtbl[i]); 4417 INIT_LIST_HEAD(&file_hashtbl[i]);
4503 } 4418 }
4504 for (i = 0; i < OWNER_HASH_SIZE; i++) { 4419 for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
4505 INIT_LIST_HEAD(&ownerstr_hashtbl[i]); 4420 INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
4506 INIT_LIST_HEAD(&ownerid_hashtbl[i]);
4507 }
4508 for (i = 0; i < STATEID_HASH_SIZE; i++) {
4509 INIT_LIST_HEAD(&stateid_hashtbl[i]);
4510 INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
4511 } 4421 }
4512 for (i = 0; i < LOCK_HASH_SIZE; i++) { 4422 for (i = 0; i < LOCK_HASH_SIZE; i++) {
4513 INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
4514 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); 4423 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
4515 } 4424 }
4516 memset(&onestateid, ~0, sizeof(stateid_t)); 4425 memset(&onestateid, ~0, sizeof(stateid_t));
@@ -4527,7 +4436,7 @@ nfsd4_load_reboot_recovery_data(void)
4527 int status; 4436 int status;
4528 4437
4529 nfs4_lock_state(); 4438 nfs4_lock_state();
4530 nfsd4_init_recdir(user_recovery_dirname); 4439 nfsd4_init_recdir();
4531 status = nfsd4_recdir_load(); 4440 status = nfsd4_recdir_load();
4532 nfs4_unlock_state(); 4441 nfs4_unlock_state();
4533 if (status) 4442 if (status)
@@ -4636,40 +4545,3 @@ nfs4_state_shutdown(void)
4636 nfs4_unlock_state(); 4545 nfs4_unlock_state();
4637 nfsd4_destroy_callback_queue(); 4546 nfsd4_destroy_callback_queue();
4638} 4547}
4639
4640/*
4641 * user_recovery_dirname is protected by the nfsd_mutex since it's only
4642 * accessed when nfsd is starting.
4643 */
4644static void
4645nfs4_set_recdir(char *recdir)
4646{
4647 strcpy(user_recovery_dirname, recdir);
4648}
4649
4650/*
4651 * Change the NFSv4 recovery directory to recdir.
4652 */
4653int
4654nfs4_reset_recoverydir(char *recdir)
4655{
4656 int status;
4657 struct path path;
4658
4659 status = kern_path(recdir, LOOKUP_FOLLOW, &path);
4660 if (status)
4661 return status;
4662 status = -ENOTDIR;
4663 if (S_ISDIR(path.dentry->d_inode->i_mode)) {
4664 nfs4_set_recdir(recdir);
4665 status = 0;
4666 }
4667 path_put(&path);
4668 return status;
4669}
4670
4671char *
4672nfs4_recoverydir(void)
4673{
4674 return user_recovery_dirname;
4675}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c8bf405d19de..66d095d7955e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -456,7 +456,6 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
456{ 456{
457 DECODE_HEAD; 457 DECODE_HEAD;
458 458
459 close->cl_stateowner = NULL;
460 READ_BUF(4); 459 READ_BUF(4);
461 READ32(close->cl_seqid); 460 READ32(close->cl_seqid);
462 return nfsd4_decode_stateid(argp, &close->cl_stateid); 461 return nfsd4_decode_stateid(argp, &close->cl_stateid);
@@ -551,7 +550,6 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
551{ 550{
552 DECODE_HEAD; 551 DECODE_HEAD;
553 552
554 lock->lk_replay_owner = NULL;
555 /* 553 /*
556 * type, reclaim(boolean), offset, length, new_lock_owner(boolean) 554 * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
557 */ 555 */
@@ -611,7 +609,6 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
611{ 609{
612 DECODE_HEAD; 610 DECODE_HEAD;
613 611
614 locku->lu_stateowner = NULL;
615 READ_BUF(8); 612 READ_BUF(8);
616 READ32(locku->lu_type); 613 READ32(locku->lu_type);
617 if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) 614 if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
@@ -642,6 +639,83 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
642 DECODE_TAIL; 639 DECODE_TAIL;
643} 640}
644 641
642static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
643{
644 __be32 *p;
645 u32 w;
646
647 READ_BUF(4);
648 READ32(w);
649 *x = w;
650 switch (w & NFS4_SHARE_ACCESS_MASK) {
651 case NFS4_SHARE_ACCESS_READ:
652 case NFS4_SHARE_ACCESS_WRITE:
653 case NFS4_SHARE_ACCESS_BOTH:
654 break;
655 default:
656 return nfserr_bad_xdr;
657 }
658 w &= !NFS4_SHARE_ACCESS_MASK;
659 if (!w)
660 return nfs_ok;
661 if (!argp->minorversion)
662 return nfserr_bad_xdr;
663 switch (w & NFS4_SHARE_WANT_MASK) {
664 case NFS4_SHARE_WANT_NO_PREFERENCE:
665 case NFS4_SHARE_WANT_READ_DELEG:
666 case NFS4_SHARE_WANT_WRITE_DELEG:
667 case NFS4_SHARE_WANT_ANY_DELEG:
668 case NFS4_SHARE_WANT_NO_DELEG:
669 case NFS4_SHARE_WANT_CANCEL:
670 break;
671 default:
672 return nfserr_bad_xdr;
673 }
674 w &= ~NFS4_SHARE_WANT_MASK;
675 if (!w)
676 return nfs_ok;
677 switch (w) {
678 case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL:
679 case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED:
680 case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL |
681 NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED):
682 return nfs_ok;
683 }
684xdr_error:
685 return nfserr_bad_xdr;
686}
687
688static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
689{
690 __be32 *p;
691
692 READ_BUF(4);
693 READ32(*x);
694 /* Note: unlinke access bits, deny bits may be zero. */
695 if (*x & ~NFS4_SHARE_DENY_BOTH)
696 return nfserr_bad_xdr;
697 return nfs_ok;
698xdr_error:
699 return nfserr_bad_xdr;
700}
701
702static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
703{
704 __be32 *p;
705
706 READ_BUF(4);
707 READ32(o->len);
708
709 if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
710 return nfserr_bad_xdr;
711
712 READ_BUF(o->len);
713 SAVEMEM(o->data, o->len);
714 return nfs_ok;
715xdr_error:
716 return nfserr_bad_xdr;
717}
718
645static __be32 719static __be32
646nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) 720nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
647{ 721{
@@ -649,19 +723,23 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
649 723
650 memset(open->op_bmval, 0, sizeof(open->op_bmval)); 724 memset(open->op_bmval, 0, sizeof(open->op_bmval));
651 open->op_iattr.ia_valid = 0; 725 open->op_iattr.ia_valid = 0;
652 open->op_stateowner = NULL; 726 open->op_openowner = NULL;
653 727
654 /* seqid, share_access, share_deny, clientid, ownerlen */ 728 /* seqid, share_access, share_deny, clientid, ownerlen */
655 READ_BUF(16 + sizeof(clientid_t)); 729 READ_BUF(4);
656 READ32(open->op_seqid); 730 READ32(open->op_seqid);
657 READ32(open->op_share_access); 731 status = nfsd4_decode_share_access(argp, &open->op_share_access);
658 READ32(open->op_share_deny); 732 if (status)
733 goto xdr_error;
734 status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
735 if (status)
736 goto xdr_error;
737 READ_BUF(sizeof(clientid_t));
659 COPYMEM(&open->op_clientid, sizeof(clientid_t)); 738 COPYMEM(&open->op_clientid, sizeof(clientid_t));
660 READ32(open->op_owner.len); 739 status = nfsd4_decode_opaque(argp, &open->op_owner);
661 740 if (status)
662 /* owner, open_flag */ 741 goto xdr_error;
663 READ_BUF(open->op_owner.len + 4); 742 READ_BUF(4);
664 SAVEMEM(open->op_owner.data, open->op_owner.len);
665 READ32(open->op_create); 743 READ32(open->op_create);
666 switch (open->op_create) { 744 switch (open->op_create) {
667 case NFS4_OPEN_NOCREATE: 745 case NFS4_OPEN_NOCREATE:
@@ -727,6 +805,19 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
727 if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) 805 if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
728 return status; 806 return status;
729 break; 807 break;
808 case NFS4_OPEN_CLAIM_FH:
809 case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
810 if (argp->minorversion < 1)
811 goto xdr_error;
812 /* void */
813 break;
814 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
815 if (argp->minorversion < 1)
816 goto xdr_error;
817 status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
818 if (status)
819 return status;
820 break;
730 default: 821 default:
731 goto xdr_error; 822 goto xdr_error;
732 } 823 }
@@ -739,7 +830,6 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
739{ 830{
740 DECODE_HEAD; 831 DECODE_HEAD;
741 832
742 open_conf->oc_stateowner = NULL;
743 status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); 833 status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
744 if (status) 834 if (status)
745 return status; 835 return status;
@@ -754,15 +844,17 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
754{ 844{
755 DECODE_HEAD; 845 DECODE_HEAD;
756 846
757 open_down->od_stateowner = NULL;
758 status = nfsd4_decode_stateid(argp, &open_down->od_stateid); 847 status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
759 if (status) 848 if (status)
760 return status; 849 return status;
761 READ_BUF(12); 850 READ_BUF(4);
762 READ32(open_down->od_seqid); 851 READ32(open_down->od_seqid);
763 READ32(open_down->od_share_access); 852 status = nfsd4_decode_share_access(argp, &open_down->od_share_access);
764 READ32(open_down->od_share_deny); 853 if (status)
765 854 return status;
855 status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
856 if (status)
857 return status;
766 DECODE_TAIL; 858 DECODE_TAIL;
767} 859}
768 860
@@ -903,12 +995,13 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
903{ 995{
904 DECODE_HEAD; 996 DECODE_HEAD;
905 997
906 READ_BUF(12); 998 READ_BUF(8);
907 COPYMEM(setclientid->se_verf.data, 8); 999 COPYMEM(setclientid->se_verf.data, 8);
908 READ32(setclientid->se_namelen);
909 1000
910 READ_BUF(setclientid->se_namelen + 8); 1001 status = nfsd4_decode_opaque(argp, &setclientid->se_name);
911 SAVEMEM(setclientid->se_name, setclientid->se_namelen); 1002 if (status)
1003 return nfserr_bad_xdr;
1004 READ_BUF(8);
912 READ32(setclientid->se_callback_prog); 1005 READ32(setclientid->se_callback_prog);
913 READ32(setclientid->se_callback_netid_len); 1006 READ32(setclientid->se_callback_netid_len);
914 1007
@@ -1051,11 +1144,9 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
1051 READ_BUF(NFS4_VERIFIER_SIZE); 1144 READ_BUF(NFS4_VERIFIER_SIZE);
1052 COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); 1145 COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
1053 1146
1054 READ_BUF(4); 1147 status = nfsd4_decode_opaque(argp, &exid->clname);
1055 READ32(exid->clname.len); 1148 if (status)
1056 1149 return nfserr_bad_xdr;
1057 READ_BUF(exid->clname.len);
1058 SAVEMEM(exid->clname.data, exid->clname.len);
1059 1150
1060 READ_BUF(4); 1151 READ_BUF(4);
1061 READ32(exid->flags); 1152 READ32(exid->flags);
@@ -1326,6 +1417,16 @@ xdr_error:
1326 goto out; 1417 goto out;
1327} 1418}
1328 1419
1420static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc)
1421{
1422 DECODE_HEAD;
1423
1424 READ_BUF(8);
1425 COPYMEM(&dc->clientid, 8);
1426
1427 DECODE_TAIL;
1428}
1429
1329static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) 1430static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
1330{ 1431{
1331 DECODE_HEAD; 1432 DECODE_HEAD;
@@ -1447,7 +1548,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
1447 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, 1548 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
1448 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, 1549 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
1449 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1550 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1450 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, 1551 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
1451 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, 1552 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
1452}; 1553};
1453 1554
@@ -1630,15 +1731,20 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
1630 * we know whether the error to be returned is a sequence id mutating error. 1731 * we know whether the error to be returned is a sequence id mutating error.
1631 */ 1732 */
1632 1733
1633#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ 1734static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr)
1634 if (seqid_mutating_err(nfserr) && stateowner) { \ 1735{
1635 stateowner->so_seqid++; \ 1736 struct nfs4_stateowner *stateowner = resp->cstate.replay_owner;
1636 stateowner->so_replay.rp_status = nfserr; \ 1737
1637 stateowner->so_replay.rp_buflen = \ 1738 if (seqid_mutating_err(ntohl(nfserr)) && stateowner) {
1638 (((char *)(resp)->p - (char *)save)); \ 1739 stateowner->so_seqid++;
1639 memcpy(stateowner->so_replay.rp_buf, save, \ 1740 stateowner->so_replay.rp_status = nfserr;
1640 stateowner->so_replay.rp_buflen); \ 1741 stateowner->so_replay.rp_buflen =
1641 } } while (0); 1742 (char *)resp->p - (char *)save;
1743 memcpy(stateowner->so_replay.rp_buf, save,
1744 stateowner->so_replay.rp_buflen);
1745 nfsd4_purge_closed_stateid(stateowner);
1746 }
1747}
1642 1748
1643/* Encode as an array of strings the string given with components 1749/* Encode as an array of strings the string given with components
1644 * separated @sep. 1750 * separated @sep.
@@ -1697,36 +1803,89 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1697} 1803}
1698 1804
1699/* 1805/*
1700 * Return the path to an export point in the pseudo filesystem namespace 1806 * Encode a path in RFC3530 'pathname4' format
1701 * Returned string is safe to use as long as the caller holds a reference
1702 * to @exp.
1703 */ 1807 */
1704static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) 1808static __be32 nfsd4_encode_path(const struct path *root,
1809 const struct path *path, __be32 **pp, int *buflen)
1705{ 1810{
1706 struct svc_fh tmp_fh; 1811 struct path cur = {
1707 char *path = NULL, *rootpath; 1812 .mnt = path->mnt,
1708 size_t rootlen; 1813 .dentry = path->dentry,
1814 };
1815 __be32 *p = *pp;
1816 struct dentry **components = NULL;
1817 unsigned int ncomponents = 0;
1818 __be32 err = nfserr_jukebox;
1709 1819
1710 fh_init(&tmp_fh, NFS4_FHSIZE); 1820 dprintk("nfsd4_encode_components(");
1711 *stat = exp_pseudoroot(rqstp, &tmp_fh);
1712 if (*stat)
1713 return NULL;
1714 rootpath = tmp_fh.fh_export->ex_pathname;
1715 1821
1716 path = exp->ex_pathname; 1822 path_get(&cur);
1823 /* First walk the path up to the nfsd root, and store the
1824 * dentries/path components in an array.
1825 */
1826 for (;;) {
1827 if (cur.dentry == root->dentry && cur.mnt == root->mnt)
1828 break;
1829 if (cur.dentry == cur.mnt->mnt_root) {
1830 if (follow_up(&cur))
1831 continue;
1832 goto out_free;
1833 }
1834 if ((ncomponents & 15) == 0) {
1835 struct dentry **new;
1836 new = krealloc(components,
1837 sizeof(*new) * (ncomponents + 16),
1838 GFP_KERNEL);
1839 if (!new)
1840 goto out_free;
1841 components = new;
1842 }
1843 components[ncomponents++] = cur.dentry;
1844 cur.dentry = dget_parent(cur.dentry);
1845 }
1717 1846
1718 rootlen = strlen(rootpath); 1847 *buflen -= 4;
1719 if (strncmp(path, rootpath, rootlen)) { 1848 if (*buflen < 0)
1720 dprintk("nfsd: fs_locations failed;" 1849 goto out_free;
1721 "%s is not contained in %s\n", path, rootpath); 1850 WRITE32(ncomponents);
1722 *stat = nfserr_notsupp; 1851
1723 path = NULL; 1852 while (ncomponents) {
1724 goto out; 1853 struct dentry *dentry = components[ncomponents - 1];
1854 unsigned int len = dentry->d_name.len;
1855
1856 *buflen -= 4 + (XDR_QUADLEN(len) << 2);
1857 if (*buflen < 0)
1858 goto out_free;
1859 WRITE32(len);
1860 WRITEMEM(dentry->d_name.name, len);
1861 dprintk("/%s", dentry->d_name.name);
1862 dput(dentry);
1863 ncomponents--;
1725 } 1864 }
1726 path += rootlen; 1865
1727out: 1866 *pp = p;
1728 fh_put(&tmp_fh); 1867 err = 0;
1729 return path; 1868out_free:
1869 dprintk(")\n");
1870 while (ncomponents)
1871 dput(components[--ncomponents]);
1872 kfree(components);
1873 path_put(&cur);
1874 return err;
1875}
1876
1877static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
1878 const struct path *path, __be32 **pp, int *buflen)
1879{
1880 struct svc_export *exp_ps;
1881 __be32 res;
1882
1883 exp_ps = rqst_find_fsidzero_export(rqstp);
1884 if (IS_ERR(exp_ps))
1885 return nfserrno(PTR_ERR(exp_ps));
1886 res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen);
1887 exp_put(exp_ps);
1888 return res;
1730} 1889}
1731 1890
1732/* 1891/*
@@ -1740,11 +1899,8 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
1740 int i; 1899 int i;
1741 __be32 *p = *pp; 1900 __be32 *p = *pp;
1742 struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; 1901 struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
1743 char *root = nfsd4_path(rqstp, exp, &status);
1744 1902
1745 if (status) 1903 status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen);
1746 return status;
1747 status = nfsd4_encode_components('/', root, &p, buflen);
1748 if (status) 1904 if (status)
1749 return status; 1905 return status;
1750 if ((*buflen -= 4) < 0) 1906 if ((*buflen -= 4) < 0)
@@ -1760,12 +1916,19 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
1760 return 0; 1916 return 0;
1761} 1917}
1762 1918
1763static u32 nfs4_ftypes[16] = { 1919static u32 nfs4_file_type(umode_t mode)
1764 NF4BAD, NF4FIFO, NF4CHR, NF4BAD, 1920{
1765 NF4DIR, NF4BAD, NF4BLK, NF4BAD, 1921 switch (mode & S_IFMT) {
1766 NF4REG, NF4BAD, NF4LNK, NF4BAD, 1922 case S_IFIFO: return NF4FIFO;
1767 NF4SOCK, NF4BAD, NF4LNK, NF4BAD, 1923 case S_IFCHR: return NF4CHR;
1768}; 1924 case S_IFDIR: return NF4DIR;
1925 case S_IFBLK: return NF4BLK;
1926 case S_IFLNK: return NF4LNK;
1927 case S_IFREG: return NF4REG;
1928 case S_IFSOCK: return NF4SOCK;
1929 default: return NF4BAD;
1930 };
1931}
1769 1932
1770static __be32 1933static __be32
1771nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, 1934nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
@@ -1954,7 +2117,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1954 if (bmval0 & FATTR4_WORD0_TYPE) { 2117 if (bmval0 & FATTR4_WORD0_TYPE) {
1955 if ((buflen -= 4) < 0) 2118 if ((buflen -= 4) < 0)
1956 goto out_resource; 2119 goto out_resource;
1957 dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12]; 2120 dummy = nfs4_file_type(stat.mode);
1958 if (dummy == NF4BAD) 2121 if (dummy == NF4BAD)
1959 goto out_serverfault; 2122 goto out_serverfault;
1960 WRITE32(dummy); 2123 WRITE32(dummy);
@@ -2488,7 +2651,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
2488 if (!nfserr) 2651 if (!nfserr)
2489 nfsd4_encode_stateid(resp, &close->cl_stateid); 2652 nfsd4_encode_stateid(resp, &close->cl_stateid);
2490 2653
2491 ENCODE_SEQID_OP_TAIL(close->cl_stateowner); 2654 encode_seqid_op_tail(resp, save, nfserr);
2492 return nfserr; 2655 return nfserr;
2493} 2656}
2494 2657
@@ -2564,17 +2727,18 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
2564static void 2727static void
2565nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) 2728nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
2566{ 2729{
2730 struct xdr_netobj *conf = &ld->ld_owner;
2567 __be32 *p; 2731 __be32 *p;
2568 2732
2569 RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); 2733 RESERVE_SPACE(32 + XDR_LEN(conf->len));
2570 WRITE64(ld->ld_start); 2734 WRITE64(ld->ld_start);
2571 WRITE64(ld->ld_length); 2735 WRITE64(ld->ld_length);
2572 WRITE32(ld->ld_type); 2736 WRITE32(ld->ld_type);
2573 if (ld->ld_sop) { 2737 if (conf->len) {
2574 WRITEMEM(&ld->ld_clientid, 8); 2738 WRITEMEM(&ld->ld_clientid, 8);
2575 WRITE32(ld->ld_sop->so_owner.len); 2739 WRITE32(conf->len);
2576 WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len); 2740 WRITEMEM(conf->data, conf->len);
2577 kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner); 2741 kfree(conf->data);
2578 } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ 2742 } else { /* non - nfsv4 lock in conflict, no clientid nor owner */
2579 WRITE64((u64)0); /* clientid */ 2743 WRITE64((u64)0); /* clientid */
2580 WRITE32(0); /* length of owner name */ 2744 WRITE32(0); /* length of owner name */
@@ -2592,7 +2756,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
2592 else if (nfserr == nfserr_denied) 2756 else if (nfserr == nfserr_denied)
2593 nfsd4_encode_lock_denied(resp, &lock->lk_denied); 2757 nfsd4_encode_lock_denied(resp, &lock->lk_denied);
2594 2758
2595 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); 2759 encode_seqid_op_tail(resp, save, nfserr);
2596 return nfserr; 2760 return nfserr;
2597} 2761}
2598 2762
@@ -2612,7 +2776,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
2612 if (!nfserr) 2776 if (!nfserr)
2613 nfsd4_encode_stateid(resp, &locku->lu_stateid); 2777 nfsd4_encode_stateid(resp, &locku->lu_stateid);
2614 2778
2615 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); 2779 encode_seqid_op_tail(resp, save, nfserr);
2616 return nfserr; 2780 return nfserr;
2617} 2781}
2618 2782
@@ -2693,7 +2857,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2693 } 2857 }
2694 /* XXX save filehandle here */ 2858 /* XXX save filehandle here */
2695out: 2859out:
2696 ENCODE_SEQID_OP_TAIL(open->op_stateowner); 2860 encode_seqid_op_tail(resp, save, nfserr);
2697 return nfserr; 2861 return nfserr;
2698} 2862}
2699 2863
@@ -2705,7 +2869,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
2705 if (!nfserr) 2869 if (!nfserr)
2706 nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); 2870 nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
2707 2871
2708 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); 2872 encode_seqid_op_tail(resp, save, nfserr);
2709 return nfserr; 2873 return nfserr;
2710} 2874}
2711 2875
@@ -2717,7 +2881,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
2717 if (!nfserr) 2881 if (!nfserr)
2718 nfsd4_encode_stateid(resp, &od->od_stateid); 2882 nfsd4_encode_stateid(resp, &od->od_stateid);
2719 2883
2720 ENCODE_SEQID_OP_TAIL(od->od_stateowner); 2884 encode_seqid_op_tail(resp, save, nfserr);
2721 return nfserr; 2885 return nfserr;
2722} 2886}
2723 2887
@@ -2759,8 +2923,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
2759 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, 2923 read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
2760 &maxcount); 2924 &maxcount);
2761 2925
2762 if (nfserr == nfserr_symlink)
2763 nfserr = nfserr_inval;
2764 if (nfserr) 2926 if (nfserr)
2765 return nfserr; 2927 return nfserr;
2766 eof = (read->rd_offset + maxcount >= 2928 eof = (read->rd_offset + maxcount >=
@@ -2886,8 +3048,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2886 readdir->common.err == nfserr_toosmall && 3048 readdir->common.err == nfserr_toosmall &&
2887 readdir->buffer == page) 3049 readdir->buffer == page)
2888 nfserr = nfserr_toosmall; 3050 nfserr = nfserr_toosmall;
2889 if (nfserr == nfserr_symlink)
2890 nfserr = nfserr_notdir;
2891 if (nfserr) 3051 if (nfserr)
2892 goto err_no_verf; 3052 goto err_no_verf;
2893 3053
@@ -3218,9 +3378,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3218 WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); 3378 WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
3219 WRITE32(seq->seqid); 3379 WRITE32(seq->seqid);
3220 WRITE32(seq->slotid); 3380 WRITE32(seq->slotid);
3221 WRITE32(seq->maxslots); 3381 /* Note slotid's are numbered from zero: */
3222 /* For now: target_maxslots = maxslots */ 3382 WRITE32(seq->maxslots - 1); /* sr_highest_slotid */
3223 WRITE32(seq->maxslots); 3383 WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */
3224 WRITE32(seq->status_flags); 3384 WRITE32(seq->status_flags);
3225 3385
3226 ADJUST_ARGS(); 3386 ADJUST_ARGS();
@@ -3233,6 +3393,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
3233 struct nfsd4_test_stateid *test_stateid) 3393 struct nfsd4_test_stateid *test_stateid)
3234{ 3394{
3235 struct nfsd4_compoundargs *argp; 3395 struct nfsd4_compoundargs *argp;
3396 struct nfs4_client *cl = resp->cstate.session->se_client;
3236 stateid_t si; 3397 stateid_t si;
3237 __be32 *p; 3398 __be32 *p;
3238 int i; 3399 int i;
@@ -3248,7 +3409,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
3248 nfs4_lock_state(); 3409 nfs4_lock_state();
3249 for (i = 0; i < test_stateid->ts_num_ids; i++) { 3410 for (i = 0; i < test_stateid->ts_num_ids; i++) {
3250 nfsd4_decode_stateid(argp, &si); 3411 nfsd4_decode_stateid(argp, &si);
3251 valid = nfs4_validate_stateid(&si, test_stateid->ts_has_session); 3412 valid = nfs4_validate_stateid(cl, &si);
3252 RESERVE_SPACE(4); 3413 RESERVE_SPACE(4);
3253 *p++ = htonl(valid); 3414 *p++ = htonl(valid);
3254 resp->p = p; 3415 resp->p = p;
@@ -3334,34 +3495,29 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3334 3495
3335/* 3496/*
3336 * Calculate the total amount of memory that the compound response has taken 3497 * Calculate the total amount of memory that the compound response has taken
3337 * after encoding the current operation. 3498 * after encoding the current operation with pad.
3338 * 3499 *
3339 * pad: add on 8 bytes for the next operation's op_code and status so that 3500 * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
3340 * there is room to cache a failure on the next operation. 3501 * which was specified at nfsd4_operation, else pad is zero.
3341 * 3502 *
3342 * Compare this length to the session se_fmaxresp_cached. 3503 * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
3343 * 3504 *
3344 * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so 3505 * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
3345 * will be at least a page and will therefore hold the xdr_buf head. 3506 * will be at least a page and will therefore hold the xdr_buf head.
3346 */ 3507 */
3347static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) 3508int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
3348{ 3509{
3349 int status = 0;
3350 struct xdr_buf *xb = &resp->rqstp->rq_res; 3510 struct xdr_buf *xb = &resp->rqstp->rq_res;
3351 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
3352 struct nfsd4_session *session = NULL; 3511 struct nfsd4_session *session = NULL;
3353 struct nfsd4_slot *slot = resp->cstate.slot; 3512 struct nfsd4_slot *slot = resp->cstate.slot;
3354 u32 length, tlen = 0, pad = 8; 3513 u32 length, tlen = 0;
3355 3514
3356 if (!nfsd4_has_session(&resp->cstate)) 3515 if (!nfsd4_has_session(&resp->cstate))
3357 return status; 3516 return 0;
3358 3517
3359 session = resp->cstate.session; 3518 session = resp->cstate.session;
3360 if (session == NULL || slot->sl_cachethis == 0) 3519 if (session == NULL)
3361 return status; 3520 return 0;
3362
3363 if (resp->opcnt >= args->opcnt)
3364 pad = 0; /* this is the last operation */
3365 3521
3366 if (xb->page_len == 0) { 3522 if (xb->page_len == 0) {
3367 length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; 3523 length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3374,10 +3530,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3374 dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, 3530 dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
3375 length, xb->page_len, tlen, pad); 3531 length, xb->page_len, tlen, pad);
3376 3532
3377 if (length <= session->se_fchannel.maxresp_cached) 3533 if (length > session->se_fchannel.maxresp_sz)
3378 return status; 3534 return nfserr_rep_too_big;
3379 else 3535
3536 if (slot->sl_cachethis == 1 &&
3537 length > session->se_fchannel.maxresp_cached)
3380 return nfserr_rep_too_big_to_cache; 3538 return nfserr_rep_too_big_to_cache;
3539
3540 return 0;
3381} 3541}
3382 3542
3383void 3543void
@@ -3397,8 +3557,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3397 !nfsd4_enc_ops[op->opnum]); 3557 !nfsd4_enc_ops[op->opnum]);
3398 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); 3558 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
3399 /* nfsd4_check_drc_limit guarantees enough room for error status */ 3559 /* nfsd4_check_drc_limit guarantees enough room for error status */
3400 if (!op->status && nfsd4_check_drc_limit(resp)) 3560 if (!op->status)
3401 op->status = nfserr_rep_too_big_to_cache; 3561 op->status = nfsd4_check_resp_size(resp, 0);
3402status: 3562status:
3403 /* 3563 /*
3404 * Note: We write the status directly, instead of using WRITE32(), 3564 * Note: We write the status directly, instead of using WRITE32(),
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c7716143cbd1..db34a585e112 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -9,7 +9,6 @@
9#include <linux/ctype.h> 9#include <linux/ctype.h>
10 10
11#include <linux/sunrpc/svcsock.h> 11#include <linux/sunrpc/svcsock.h>
12#include <linux/nfsd/syscall.h>
13#include <linux/lockd/lockd.h> 12#include <linux/lockd/lockd.h>
14#include <linux/sunrpc/clnt.h> 13#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/gss_api.h> 14#include <linux/sunrpc/gss_api.h>
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7ecfa2420307..58134a23fdfb 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -11,13 +11,39 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/mount.h> 12#include <linux/mount.h>
13 13
14#include <linux/nfs.h>
15#include <linux/nfs2.h>
16#include <linux/nfs3.h>
17#include <linux/nfs4.h>
18#include <linux/sunrpc/msg_prot.h>
19
14#include <linux/nfsd/debug.h> 20#include <linux/nfsd/debug.h>
15#include <linux/nfsd/export.h> 21#include <linux/nfsd/export.h>
16#include <linux/nfsd/stats.h> 22#include <linux/nfsd/stats.h>
23
17/* 24/*
18 * nfsd version 25 * nfsd version
19 */ 26 */
20#define NFSD_SUPPORTED_MINOR_VERSION 1 27#define NFSD_SUPPORTED_MINOR_VERSION 1
28/*
29 * Maximum blocksizes supported by daemon under various circumstances.
30 */
31#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD
32/* NFSv2 is limited by the protocol specification, see RFC 1094 */
33#define NFSSVC_MAXBLKSIZE_V2 (8*1024)
34
35
36/*
37 * Largest number of bytes we need to allocate for an NFS
38 * call or reply. Used to control buffer sizes. We use
39 * the length of v3 WRITE, READDIR and READDIR replies
40 * which are an RPC header, up to 26 XDR units of reply
41 * data, and some page data.
42 *
43 * Note that accuracy here doesn't matter too much as the
44 * size is rounded up to a page size when allocating space.
45 */
46#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE)
21 47
22struct readdir_cd { 48struct readdir_cd {
23 __be32 err; /* 0, nfserr, or nfserr_eof */ 49 __be32 err; /* 0, nfserr, or nfserr_eof */
@@ -335,6 +361,13 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
335#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ 361#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
336 NFSD_WRITEABLE_ATTRS_WORD2 362 NFSD_WRITEABLE_ATTRS_WORD2
337 363
364extern int nfsd4_is_junction(struct dentry *dentry);
365#else
366static inline int nfsd4_is_junction(struct dentry *dentry)
367{
368 return 0;
369}
370
338#endif /* CONFIG_NFSD_V4 */ 371#endif /* CONFIG_NFSD_V4 */
339 372
340#endif /* LINUX_NFSD_NFSD_H */ 373#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 90c6aa6d5e0f..c763de5c1157 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -59,28 +59,25 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
59 * the write call). 59 * the write call).
60 */ 60 */
61static inline __be32 61static inline __be32
62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type) 62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested)
63{ 63{
64 /* Type can be negative when creating hardlinks - not to a dir */ 64 mode &= S_IFMT;
65 if (type > 0 && (mode & S_IFMT) != type) { 65
66 if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) 66 if (requested == 0) /* the caller doesn't care */
67 return nfserr_symlink; 67 return nfs_ok;
68 else if (type == S_IFDIR) 68 if (mode == requested)
69 return nfserr_notdir; 69 return nfs_ok;
70 else if ((mode & S_IFMT) == S_IFDIR) 70 /*
71 return nfserr_isdir; 71 * v4 has an error more specific than err_notdir which we should
72 else 72 * return in preference to err_notdir:
73 return nfserr_inval; 73 */
74 } 74 if (rqstp->rq_vers == 4 && mode == S_IFLNK)
75 if (type < 0 && (mode & S_IFMT) == -type) { 75 return nfserr_symlink;
76 if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) 76 if (requested == S_IFDIR)
77 return nfserr_symlink; 77 return nfserr_notdir;
78 else if (type == -S_IFDIR) 78 if (mode == S_IFDIR)
79 return nfserr_isdir; 79 return nfserr_isdir;
80 else 80 return nfserr_inval;
81 return nfserr_notdir;
82 }
83 return 0;
84} 81}
85 82
86static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, 83static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4eefaf1b42e8..a3cf38476a1b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
35#ifndef _NFSD4_STATE_H 35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/idr.h>
38#include <linux/sunrpc/svc_xprt.h> 39#include <linux/sunrpc/svc_xprt.h>
39#include <linux/nfsd/nfsfh.h> 40#include <linux/nfsd/nfsfh.h>
40#include "nfsfh.h" 41#include "nfsfh.h"
@@ -45,24 +46,20 @@ typedef struct {
45} clientid_t; 46} clientid_t;
46 47
47typedef struct { 48typedef struct {
48 u32 so_boot; 49 clientid_t so_clid;
49 u32 so_stateownerid; 50 u32 so_id;
50 u32 so_fileid;
51} stateid_opaque_t; 51} stateid_opaque_t;
52 52
53typedef struct { 53typedef struct {
54 u32 si_generation; 54 u32 si_generation;
55 stateid_opaque_t si_opaque; 55 stateid_opaque_t si_opaque;
56} stateid_t; 56} stateid_t;
57#define si_boot si_opaque.so_boot
58#define si_stateownerid si_opaque.so_stateownerid
59#define si_fileid si_opaque.so_fileid
60 57
61#define STATEID_FMT "(%08x/%08x/%08x/%08x)" 58#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
62#define STATEID_VAL(s) \ 59#define STATEID_VAL(s) \
63 (s)->si_boot, \ 60 (s)->si_opaque.so_clid.cl_boot, \
64 (s)->si_stateownerid, \ 61 (s)->si_opaque.so_clid.cl_id, \
65 (s)->si_fileid, \ 62 (s)->si_opaque.so_id, \
66 (s)->si_generation 63 (s)->si_generation
67 64
68struct nfsd4_callback { 65struct nfsd4_callback {
@@ -76,17 +73,27 @@ struct nfsd4_callback {
76 bool cb_done; 73 bool cb_done;
77}; 74};
78 75
76struct nfs4_stid {
77#define NFS4_OPEN_STID 1
78#define NFS4_LOCK_STID 2
79#define NFS4_DELEG_STID 4
80/* For an open stateid kept around *only* to process close replays: */
81#define NFS4_CLOSED_STID 8
82 unsigned char sc_type;
83 stateid_t sc_stateid;
84 struct nfs4_client *sc_client;
85};
86
79struct nfs4_delegation { 87struct nfs4_delegation {
88 struct nfs4_stid dl_stid; /* must be first field */
80 struct list_head dl_perfile; 89 struct list_head dl_perfile;
81 struct list_head dl_perclnt; 90 struct list_head dl_perclnt;
82 struct list_head dl_recall_lru; /* delegation recalled */ 91 struct list_head dl_recall_lru; /* delegation recalled */
83 atomic_t dl_count; /* ref count */ 92 atomic_t dl_count; /* ref count */
84 struct nfs4_client *dl_client;
85 struct nfs4_file *dl_file; 93 struct nfs4_file *dl_file;
86 u32 dl_type; 94 u32 dl_type;
87 time_t dl_time; 95 time_t dl_time;
88/* For recall: */ 96/* For recall: */
89 stateid_t dl_stateid;
90 struct knfsd_fh dl_fh; 97 struct knfsd_fh dl_fh;
91 int dl_retries; 98 int dl_retries;
92 struct nfsd4_callback dl_recall; 99 struct nfsd4_callback dl_recall;
@@ -104,6 +111,11 @@ struct nfs4_cb_conn {
104 struct svc_xprt *cb_xprt; /* minorversion 1 only */ 111 struct svc_xprt *cb_xprt; /* minorversion 1 only */
105}; 112};
106 113
114static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
115{
116 return container_of(s, struct nfs4_delegation, dl_stid);
117}
118
107/* Maximum number of slots per session. 160 is useful for long haul TCP */ 119/* Maximum number of slots per session. 160 is useful for long haul TCP */
108#define NFSD_MAX_SLOTS_PER_SESSION 160 120#define NFSD_MAX_SLOTS_PER_SESSION 160
109/* Maximum number of operations per session compound */ 121/* Maximum number of operations per session compound */
@@ -220,6 +232,7 @@ struct nfs4_client {
220 struct list_head cl_idhash; /* hash by cl_clientid.id */ 232 struct list_head cl_idhash; /* hash by cl_clientid.id */
221 struct list_head cl_strhash; /* hash by cl_name */ 233 struct list_head cl_strhash; /* hash by cl_name */
222 struct list_head cl_openowners; 234 struct list_head cl_openowners;
235 struct idr cl_stateids; /* stateid lookup */
223 struct list_head cl_delegations; 236 struct list_head cl_delegations;
224 struct list_head cl_lru; /* tail queue */ 237 struct list_head cl_lru; /* tail queue */
225 struct xdr_netobj cl_name; /* id generated by client */ 238 struct xdr_netobj cl_name; /* id generated by client */
@@ -245,6 +258,7 @@ struct nfs4_client {
245#define NFSD4_CB_UP 0 258#define NFSD4_CB_UP 0
246#define NFSD4_CB_UNKNOWN 1 259#define NFSD4_CB_UNKNOWN 1
247#define NFSD4_CB_DOWN 2 260#define NFSD4_CB_DOWN 2
261#define NFSD4_CB_FAULT 3
248 int cl_cb_state; 262 int cl_cb_state;
249 struct nfsd4_callback cl_cb_null; 263 struct nfsd4_callback cl_cb_null;
250 struct nfsd4_session *cl_cb_session; 264 struct nfsd4_session *cl_cb_session;
@@ -293,6 +307,9 @@ static inline void
293update_stateid(stateid_t *stateid) 307update_stateid(stateid_t *stateid)
294{ 308{
295 stateid->si_generation++; 309 stateid->si_generation++;
310 /* Wraparound recommendation from 3530bis-13 9.1.3.2: */
311 if (stateid->si_generation == 0)
312 stateid->si_generation = 1;
296} 313}
297 314
298/* A reasonable value for REPLAY_ISIZE was estimated as follows: 315/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -312,49 +329,57 @@ struct nfs4_replay {
312 __be32 rp_status; 329 __be32 rp_status;
313 unsigned int rp_buflen; 330 unsigned int rp_buflen;
314 char *rp_buf; 331 char *rp_buf;
315 unsigned intrp_allocated;
316 struct knfsd_fh rp_openfh; 332 struct knfsd_fh rp_openfh;
317 char rp_ibuf[NFSD4_REPLAY_ISIZE]; 333 char rp_ibuf[NFSD4_REPLAY_ISIZE];
318}; 334};
319 335
320/*
321* nfs4_stateowner can either be an open_owner, or a lock_owner
322*
323* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
324* for lock_owner
325* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
326* for lock_owner
327* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
328* struct is reaped.
329* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
330* and is used to ensure no dangling nfs4_stateid references when we
331* release a stateowner.
332* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
333* close is called to reap associated byte-range locks
334* so_close_lru: (open) stateowner is placed on this list instead of being
335* reaped (when so_perfilestate is empty) to hold the last close replay.
336* reaped by laundramat thread after lease period.
337*/
338struct nfs4_stateowner { 336struct nfs4_stateowner {
339 struct kref so_ref;
340 struct list_head so_idhash; /* hash by so_id */
341 struct list_head so_strhash; /* hash by op_name */ 337 struct list_head so_strhash; /* hash by op_name */
342 struct list_head so_perclient;
343 struct list_head so_stateids; 338 struct list_head so_stateids;
344 struct list_head so_perstateid; /* for lockowners only */
345 struct list_head so_close_lru; /* tail queue */
346 time_t so_time; /* time of placement on so_close_lru */
347 int so_is_open_owner; /* 1=openowner,0=lockowner */
348 u32 so_id;
349 struct nfs4_client * so_client; 339 struct nfs4_client * so_client;
350 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next 340 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
351 * sequence id expected from the client: */ 341 * sequence id expected from the client: */
352 u32 so_seqid; 342 u32 so_seqid;
353 struct xdr_netobj so_owner; /* open owner name */ 343 struct xdr_netobj so_owner; /* open owner name */
354 int so_confirmed; /* successful OPEN_CONFIRM? */
355 struct nfs4_replay so_replay; 344 struct nfs4_replay so_replay;
345 bool so_is_open_owner;
356}; 346};
357 347
348struct nfs4_openowner {
349 struct nfs4_stateowner oo_owner; /* must be first field */
350 struct list_head oo_perclient;
351 /*
352 * We keep around openowners a little while after last close,
353 * which saves clients from having to confirm, and allows us to
354 * handle close replays if they come soon enough. The close_lru
355 * is a list of such openowners, to be reaped by the laundromat
356 * thread eventually if they remain unused:
357 */
358 struct list_head oo_close_lru;
359 struct nfs4_ol_stateid *oo_last_closed_stid;
360 time_t oo_time; /* time of placement on so_close_lru */
361#define NFS4_OO_CONFIRMED 1
362#define NFS4_OO_PURGE_CLOSE 2
363#define NFS4_OO_NEW 4
364 unsigned char oo_flags;
365};
366
367struct nfs4_lockowner {
368 struct nfs4_stateowner lo_owner; /* must be first element */
369 struct list_head lo_perstateid; /* for lockowners only */
370 struct list_head lo_list; /* for temporary uses */
371};
372
373static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
374{
375 return container_of(so, struct nfs4_openowner, oo_owner);
376}
377
378static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
379{
380 return container_of(so, struct nfs4_lockowner, lo_owner);
381}
382
358/* 383/*
359* nfs4_file: a file opened by some number of (open) nfs4_stateowners. 384* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
360* o fi_perfile list is used to search for conflicting 385* o fi_perfile list is used to search for conflicting
@@ -368,17 +393,17 @@ struct nfs4_file {
368 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 393 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
369 struct file * fi_fds[3]; 394 struct file * fi_fds[3];
370 /* 395 /*
371 * Each open or lock stateid contributes 1 to either 396 * Each open or lock stateid contributes 0-4 to the counts
372 * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending 397 * below depending on which bits are set in st_access_bitmap:
373 * on open or lock mode: 398 * 1 to fi_access[O_RDONLY] if NFS4_SHARE_ACCES_READ is set
399 * + 1 to fi_access[O_WRONLY] if NFS4_SHARE_ACCESS_WRITE is set
400 * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
374 */ 401 */
375 atomic_t fi_access[2]; 402 atomic_t fi_access[2];
376 struct file *fi_deleg_file; 403 struct file *fi_deleg_file;
377 struct file_lock *fi_lease; 404 struct file_lock *fi_lease;
378 atomic_t fi_delegees; 405 atomic_t fi_delegees;
379 struct inode *fi_inode; 406 struct inode *fi_inode;
380 u32 fi_id; /* used with stateowner->so_id
381 * for stateid_hashtbl hash */
382 bool fi_had_conflict; 407 bool fi_had_conflict;
383}; 408};
384 409
@@ -408,50 +433,27 @@ static inline struct file *find_any_file(struct nfs4_file *f)
408 return f->fi_fds[O_RDONLY]; 433 return f->fi_fds[O_RDONLY];
409} 434}
410 435
411/* 436/* "ol" stands for "Open or Lock". Better suggestions welcome. */
412* nfs4_stateid can either be an open stateid or (eventually) a lock stateid 437struct nfs4_ol_stateid {
413* 438 struct nfs4_stid st_stid; /* must be first field */
414* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
415*
416* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
417* st_perfile: file_hashtbl[] entry.
418* st_perfile_state: nfs4_stateowner->so_perfilestate
419* st_perlockowner: (open stateid) list of lock nfs4_stateowners
420* st_access_bmap: used only for open stateid
421* st_deny_bmap: used only for open stateid
422* st_openstp: open stateid lock stateid was derived from
423*
424* XXX: open stateids and lock stateids have diverged sufficiently that
425* we should consider defining separate structs for the two cases.
426*/
427
428struct nfs4_stateid {
429 struct list_head st_hash;
430 struct list_head st_perfile; 439 struct list_head st_perfile;
431 struct list_head st_perstateowner; 440 struct list_head st_perstateowner;
432 struct list_head st_lockowners; 441 struct list_head st_lockowners;
433 struct nfs4_stateowner * st_stateowner; 442 struct nfs4_stateowner * st_stateowner;
434 struct nfs4_file * st_file; 443 struct nfs4_file * st_file;
435 stateid_t st_stateid;
436 unsigned long st_access_bmap; 444 unsigned long st_access_bmap;
437 unsigned long st_deny_bmap; 445 unsigned long st_deny_bmap;
438 struct nfs4_stateid * st_openstp; 446 struct nfs4_ol_stateid * st_openstp;
439}; 447};
440 448
449static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
450{
451 return container_of(s, struct nfs4_ol_stateid, st_stid);
452}
453
441/* flags for preprocess_seqid_op() */ 454/* flags for preprocess_seqid_op() */
442#define HAS_SESSION 0x00000001
443#define CONFIRM 0x00000002
444#define OPEN_STATE 0x00000004
445#define LOCK_STATE 0x00000008
446#define RD_STATE 0x00000010 455#define RD_STATE 0x00000010
447#define WR_STATE 0x00000020 456#define WR_STATE 0x00000020
448#define CLOSE_STATE 0x00000040
449
450#define seqid_mutating_err(err) \
451 (((err) != nfserr_stale_clientid) && \
452 ((err) != nfserr_bad_seqid) && \
453 ((err) != nfserr_stale_stateid) && \
454 ((err) != nfserr_bad_stateid))
455 457
456struct nfsd4_compound_state; 458struct nfsd4_compound_state;
457 459
@@ -461,7 +463,8 @@ extern void nfs4_lock_state(void);
461extern void nfs4_unlock_state(void); 463extern void nfs4_unlock_state(void);
462extern int nfs4_in_grace(void); 464extern int nfs4_in_grace(void);
463extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 465extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
464extern void nfs4_free_stateowner(struct kref *kref); 466extern void nfs4_free_openowner(struct nfs4_openowner *);
467extern void nfs4_free_lockowner(struct nfs4_lockowner *);
465extern int set_callback_cred(void); 468extern int set_callback_cred(void);
466extern void nfsd4_probe_callback(struct nfs4_client *clp); 469extern void nfsd4_probe_callback(struct nfs4_client *clp);
467extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 470extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
@@ -473,7 +476,7 @@ extern void nfsd4_destroy_callback_queue(void);
473extern void nfsd4_shutdown_callback(struct nfs4_client *); 476extern void nfsd4_shutdown_callback(struct nfs4_client *);
474extern void nfs4_put_delegation(struct nfs4_delegation *dp); 477extern void nfs4_put_delegation(struct nfs4_delegation *dp);
475extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 478extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
476extern void nfsd4_init_recdir(char *recdir_name); 479extern void nfsd4_init_recdir(void);
477extern int nfsd4_recdir_load(void); 480extern int nfsd4_recdir_load(void);
478extern void nfsd4_shutdown_recdir(void); 481extern void nfsd4_shutdown_recdir(void);
479extern int nfs4_client_to_reclaim(const char *name); 482extern int nfs4_client_to_reclaim(const char *name);
@@ -482,18 +485,7 @@ extern void nfsd4_recdir_purge_old(void);
482extern int nfsd4_create_clid_dir(struct nfs4_client *clp); 485extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
483extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); 486extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
484extern void release_session_client(struct nfsd4_session *); 487extern void release_session_client(struct nfsd4_session *);
485extern __be32 nfs4_validate_stateid(stateid_t *, int); 488extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
486 489extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
487static inline void
488nfs4_put_stateowner(struct nfs4_stateowner *so)
489{
490 kref_put(&so->so_ref, nfs4_free_stateowner);
491}
492
493static inline void
494nfs4_get_stateowner(struct nfs4_stateowner *so)
495{
496 kref_get(&so->so_ref);
497}
498 490
499#endif /* NFSD4_STATE_H */ 491#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fd0acca5370a..7a2e442623c8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -168,6 +168,8 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
168{ 168{
169 if (d_mountpoint(dentry)) 169 if (d_mountpoint(dentry))
170 return 1; 170 return 1;
171 if (nfsd4_is_junction(dentry))
172 return 1;
171 if (!(exp->ex_flags & NFSEXP_V4ROOT)) 173 if (!(exp->ex_flags & NFSEXP_V4ROOT))
172 return 0; 174 return 0;
173 return dentry->d_inode != NULL; 175 return dentry->d_inode != NULL;
@@ -502,7 +504,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
502 unsigned int flags = 0; 504 unsigned int flags = 0;
503 505
504 /* Get inode */ 506 /* Get inode */
505 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); 507 error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
506 if (error) 508 if (error)
507 return error; 509 return error;
508 510
@@ -592,6 +594,22 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
592 return error; 594 return error;
593} 595}
594 596
597#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
598#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
599int nfsd4_is_junction(struct dentry *dentry)
600{
601 struct inode *inode = dentry->d_inode;
602
603 if (inode == NULL)
604 return 0;
605 if (inode->i_mode & S_IXUGO)
606 return 0;
607 if (!(inode->i_mode & S_ISVTX))
608 return 0;
609 if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
610 return 0;
611 return 1;
612}
595#endif /* defined(CONFIG_NFSD_V4) */ 613#endif /* defined(CONFIG_NFSD_V4) */
596 614
597#ifdef CONFIG_NFSD_V3 615#ifdef CONFIG_NFSD_V3
@@ -1352,7 +1370,7 @@ __be32
1352do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1370do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1353 char *fname, int flen, struct iattr *iap, 1371 char *fname, int flen, struct iattr *iap,
1354 struct svc_fh *resfhp, int createmode, u32 *verifier, 1372 struct svc_fh *resfhp, int createmode, u32 *verifier,
1355 int *truncp, int *created) 1373 bool *truncp, bool *created)
1356{ 1374{
1357 struct dentry *dentry, *dchild = NULL; 1375 struct dentry *dentry, *dchild = NULL;
1358 struct inode *dirp; 1376 struct inode *dirp;
@@ -1632,10 +1650,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1632 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); 1650 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1633 if (err) 1651 if (err)
1634 goto out; 1652 goto out;
1635 err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); 1653 err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
1636 if (err) 1654 if (err)
1637 goto out; 1655 goto out;
1638 1656 err = nfserr_isdir;
1657 if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode))
1658 goto out;
1639 err = nfserr_perm; 1659 err = nfserr_perm;
1640 if (!len) 1660 if (!len)
1641 goto out; 1661 goto out;
@@ -2114,7 +2134,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2114 2134
2115 /* Allow read access to binaries even when mode 111 */ 2135 /* Allow read access to binaries even when mode 111 */
2116 if (err == -EACCES && S_ISREG(inode->i_mode) && 2136 if (err == -EACCES && S_ISREG(inode->i_mode) &&
2117 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2137 (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
2138 acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
2118 err = inode_permission(inode, MAY_EXEC); 2139 err = inode_permission(inode, MAY_EXEC);
2119 2140
2120 return err? nfserrno(err) : 0; 2141 return err? nfserrno(err) : 0;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index e0bbac04d1dd..3f54ad03bb2b 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -10,21 +10,22 @@
10/* 10/*
11 * Flags for nfsd_permission 11 * Flags for nfsd_permission
12 */ 12 */
13#define NFSD_MAY_NOP 0 13#define NFSD_MAY_NOP 0
14#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */ 14#define NFSD_MAY_EXEC 0x001 /* == MAY_EXEC */
15#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */ 15#define NFSD_MAY_WRITE 0x002 /* == MAY_WRITE */
16#define NFSD_MAY_READ 4 /* == MAY_READ */ 16#define NFSD_MAY_READ 0x004 /* == MAY_READ */
17#define NFSD_MAY_SATTR 8 17#define NFSD_MAY_SATTR 0x008
18#define NFSD_MAY_TRUNC 16 18#define NFSD_MAY_TRUNC 0x010
19#define NFSD_MAY_LOCK 32 19#define NFSD_MAY_LOCK 0x020
20#define NFSD_MAY_MASK 63 20#define NFSD_MAY_MASK 0x03f
21 21
22/* extra hints to permission and open routines: */ 22/* extra hints to permission and open routines: */
23#define NFSD_MAY_OWNER_OVERRIDE 64 23#define NFSD_MAY_OWNER_OVERRIDE 0x040
24#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 24#define NFSD_MAY_LOCAL_ACCESS 0x080 /* for device special files */
25#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 25#define NFSD_MAY_BYPASS_GSS_ON_ROOT 0x100
26#define NFSD_MAY_NOT_BREAK_LEASE 512 26#define NFSD_MAY_NOT_BREAK_LEASE 0x200
27#define NFSD_MAY_BYPASS_GSS 1024 27#define NFSD_MAY_BYPASS_GSS 0x400
28#define NFSD_MAY_READ_IF_EXEC 0x800
28 29
29#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 30#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
30#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 31#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
@@ -61,7 +62,7 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
61__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, 62__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
62 char *name, int len, struct iattr *attrs, 63 char *name, int len, struct iattr *attrs,
63 struct svc_fh *res, int createmode, 64 struct svc_fh *res, int createmode,
64 u32 *verifier, int *truncp, int *created); 65 u32 *verifier, bool *truncp, bool *created);
65__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 66__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
66 loff_t, unsigned long); 67 loff_t, unsigned long);
67#endif /* CONFIG_NFSD_V3 */ 68#endif /* CONFIG_NFSD_V3 */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d2a8d04428c7..2364747ee97d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -81,7 +81,6 @@ struct nfsd4_access {
81struct nfsd4_close { 81struct nfsd4_close {
82 u32 cl_seqid; /* request */ 82 u32 cl_seqid; /* request */
83 stateid_t cl_stateid; /* request+response */ 83 stateid_t cl_stateid; /* request+response */
84 struct nfs4_stateowner * cl_stateowner; /* response */
85}; 84};
86 85
87struct nfsd4_commit { 86struct nfsd4_commit {
@@ -131,7 +130,7 @@ struct nfsd4_link {
131 130
132struct nfsd4_lock_denied { 131struct nfsd4_lock_denied {
133 clientid_t ld_clientid; 132 clientid_t ld_clientid;
134 struct nfs4_stateowner *ld_sop; 133 struct xdr_netobj ld_owner;
135 u64 ld_start; 134 u64 ld_start;
136 u64 ld_length; 135 u64 ld_length;
137 u32 ld_type; 136 u32 ld_type;
@@ -165,9 +164,6 @@ struct nfsd4_lock {
165 } ok; 164 } ok;
166 struct nfsd4_lock_denied denied; 165 struct nfsd4_lock_denied denied;
167 } u; 166 } u;
168 /* The lk_replay_owner is the open owner in the open_to_lock_owner
169 * case and the lock owner otherwise: */
170 struct nfs4_stateowner *lk_replay_owner;
171}; 167};
172#define lk_new_open_seqid v.new.open_seqid 168#define lk_new_open_seqid v.new.open_seqid
173#define lk_new_open_stateid v.new.open_stateid 169#define lk_new_open_stateid v.new.open_stateid
@@ -188,7 +184,6 @@ struct nfsd4_lockt {
188 struct xdr_netobj lt_owner; 184 struct xdr_netobj lt_owner;
189 u64 lt_offset; 185 u64 lt_offset;
190 u64 lt_length; 186 u64 lt_length;
191 struct nfs4_stateowner * lt_stateowner;
192 struct nfsd4_lock_denied lt_denied; 187 struct nfsd4_lock_denied lt_denied;
193}; 188};
194 189
@@ -199,7 +194,6 @@ struct nfsd4_locku {
199 stateid_t lu_stateid; 194 stateid_t lu_stateid;
200 u64 lu_offset; 195 u64 lu_offset;
201 u64 lu_length; 196 u64 lu_length;
202 struct nfs4_stateowner *lu_stateowner;
203}; 197};
204 198
205 199
@@ -232,8 +226,11 @@ struct nfsd4_open {
232 u32 op_recall; /* recall */ 226 u32 op_recall; /* recall */
233 struct nfsd4_change_info op_cinfo; /* response */ 227 struct nfsd4_change_info op_cinfo; /* response */
234 u32 op_rflags; /* response */ 228 u32 op_rflags; /* response */
235 int op_truncate; /* used during processing */ 229 bool op_truncate; /* used during processing */
236 struct nfs4_stateowner *op_stateowner; /* used during processing */ 230 bool op_created; /* used during processing */
231 struct nfs4_openowner *op_openowner; /* used during processing */
232 struct nfs4_file *op_file; /* used during processing */
233 struct nfs4_ol_stateid *op_stp; /* used during processing */
237 struct nfs4_acl *op_acl; 234 struct nfs4_acl *op_acl;
238}; 235};
239#define op_iattr iattr 236#define op_iattr iattr
@@ -243,7 +240,6 @@ struct nfsd4_open_confirm {
243 stateid_t oc_req_stateid /* request */; 240 stateid_t oc_req_stateid /* request */;
244 u32 oc_seqid /* request */; 241 u32 oc_seqid /* request */;
245 stateid_t oc_resp_stateid /* response */; 242 stateid_t oc_resp_stateid /* response */;
246 struct nfs4_stateowner * oc_stateowner; /* response */
247}; 243};
248 244
249struct nfsd4_open_downgrade { 245struct nfsd4_open_downgrade {
@@ -251,7 +247,6 @@ struct nfsd4_open_downgrade {
251 u32 od_seqid; 247 u32 od_seqid;
252 u32 od_share_access; 248 u32 od_share_access;
253 u32 od_share_deny; 249 u32 od_share_deny;
254 struct nfs4_stateowner *od_stateowner;
255}; 250};
256 251
257 252
@@ -325,8 +320,7 @@ struct nfsd4_setattr {
325 320
326struct nfsd4_setclientid { 321struct nfsd4_setclientid {
327 nfs4_verifier se_verf; /* request */ 322 nfs4_verifier se_verf; /* request */
328 u32 se_namelen; /* request */ 323 struct xdr_netobj se_name;
329 char * se_name; /* request */
330 u32 se_callback_prog; /* request */ 324 u32 se_callback_prog; /* request */
331 u32 se_callback_netid_len; /* request */ 325 u32 se_callback_netid_len; /* request */
332 char * se_callback_netid_val; /* request */ 326 char * se_callback_netid_val; /* request */
@@ -351,7 +345,6 @@ struct nfsd4_saved_compoundargs {
351 345
352struct nfsd4_test_stateid { 346struct nfsd4_test_stateid {
353 __be32 ts_num_ids; 347 __be32 ts_num_ids;
354 __be32 ts_has_session;
355 struct nfsd4_compoundargs *ts_saved_args; 348 struct nfsd4_compoundargs *ts_saved_args;
356 struct nfsd4_saved_compoundargs ts_savedp; 349 struct nfsd4_saved_compoundargs ts_savedp;
357}; 350};
@@ -405,6 +398,10 @@ struct nfsd4_destroy_session {
405 struct nfs4_sessionid sessionid; 398 struct nfs4_sessionid sessionid;
406}; 399};
407 400
401struct nfsd4_destroy_clientid {
402 clientid_t clientid;
403};
404
408struct nfsd4_reclaim_complete { 405struct nfsd4_reclaim_complete {
409 u32 rca_one_fs; 406 u32 rca_one_fs;
410}; 407};
@@ -532,6 +529,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
532 struct nfsd4_compoundargs *); 529 struct nfsd4_compoundargs *);
533int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, 530int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
534 struct nfsd4_compoundres *); 531 struct nfsd4_compoundres *);
532int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
535void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); 533void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
536void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); 534void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
537__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, 535__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
@@ -558,11 +556,13 @@ extern __be32 nfsd4_sequence(struct svc_rqst *,
558extern __be32 nfsd4_destroy_session(struct svc_rqst *, 556extern __be32 nfsd4_destroy_session(struct svc_rqst *,
559 struct nfsd4_compound_state *, 557 struct nfsd4_compound_state *,
560 struct nfsd4_destroy_session *); 558 struct nfsd4_destroy_session *);
559extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
561__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); 560__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
562extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, 561extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
563 struct nfsd4_open *open); 562 struct nfsd4_open *open);
564extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 563extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
565 struct svc_fh *current_fh, struct nfsd4_open *open); 564 struct svc_fh *current_fh, struct nfsd4_open *open);
565extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status);
566extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, 566extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
567 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); 567 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
568extern __be32 nfsd4_close(struct svc_rqst *rqstp, 568extern __be32 nfsd4_close(struct svc_rqst *rqstp,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 81ecf9c0bf0a..194fb22ef79d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,20 +7185,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
7185{ 7185{
7186 int ret = 0; 7186 int ret = 0;
7187 struct buffer_head *dir_bh = NULL; 7187 struct buffer_head *dir_bh = NULL;
7188 struct ocfs2_security_xattr_info si = {
7189 .enable = 1,
7190 };
7191 7188
7192 ret = ocfs2_init_security_get(inode, dir, qstr, &si); 7189 ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7193 if (!ret) { 7190 if (!ret) {
7194 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7195 si.name, si.value, si.value_len,
7196 XATTR_CREATE);
7197 if (ret) {
7198 mlog_errno(ret);
7199 goto leave;
7200 }
7201 } else if (ret != -EOPNOTSUPP) {
7202 mlog_errno(ret); 7191 mlog_errno(ret);
7203 goto leave; 7192 goto leave;
7204 } 7193 }
@@ -7255,6 +7244,22 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7255 name, value, size, flags); 7244 name, value, size, flags);
7256} 7245}
7257 7246
7247int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7248 void *fs_info)
7249{
7250 const struct xattr *xattr;
7251 int err = 0;
7252
7253 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7254 err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7255 xattr->name, xattr->value,
7256 xattr->value_len, XATTR_CREATE);
7257 if (err)
7258 break;
7259 }
7260 return err;
7261}
7262
7258int ocfs2_init_security_get(struct inode *inode, 7263int ocfs2_init_security_get(struct inode *inode,
7259 struct inode *dir, 7264 struct inode *dir,
7260 const struct qstr *qstr, 7265 const struct qstr *qstr,
@@ -7263,8 +7268,13 @@ int ocfs2_init_security_get(struct inode *inode,
7263 /* check whether ocfs2 support feature xattr */ 7268 /* check whether ocfs2 support feature xattr */
7264 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7269 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7265 return -EOPNOTSUPP; 7270 return -EOPNOTSUPP;
7266 return security_inode_init_security(inode, dir, qstr, &si->name, 7271 if (si)
7267 &si->value, &si->value_len); 7272 return security_old_inode_init_security(inode, dir, qstr,
7273 &si->name, &si->value,
7274 &si->value_len);
7275
7276 return security_inode_init_security(inode, dir, qstr,
7277 &ocfs2_initxattrs, NULL);
7268} 7278}
7269 7279
7270int ocfs2_init_security_set(handle_t *handle, 7280int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9758b654a1bc..42b274da92c3 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -10,6 +10,7 @@
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/irqnr.h> 11#include <linux/irqnr.h>
12#include <asm/cputime.h> 12#include <asm/cputime.h>
13#include <linux/tick.h>
13 14
14#ifndef arch_irq_stat_cpu 15#ifndef arch_irq_stat_cpu
15#define arch_irq_stat_cpu(cpu) 0 16#define arch_irq_stat_cpu(cpu) 0
@@ -21,6 +22,35 @@
21#define arch_idle_time(cpu) 0 22#define arch_idle_time(cpu) 0
22#endif 23#endif
23 24
25static cputime64_t get_idle_time(int cpu)
26{
27 u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
28 cputime64_t idle;
29
30 if (idle_time == -1ULL) {
31 /* !NO_HZ so we can rely on cpustat.idle */
32 idle = kstat_cpu(cpu).cpustat.idle;
33 idle = cputime64_add(idle, arch_idle_time(cpu));
34 } else
35 idle = usecs_to_cputime(idle_time);
36
37 return idle;
38}
39
40static cputime64_t get_iowait_time(int cpu)
41{
42 u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
43 cputime64_t iowait;
44
45 if (iowait_time == -1ULL)
46 /* !NO_HZ so we can rely on cpustat.iowait */
47 iowait = kstat_cpu(cpu).cpustat.iowait;
48 else
49 iowait = usecs_to_cputime(iowait_time);
50
51 return iowait;
52}
53
24static int show_stat(struct seq_file *p, void *v) 54static int show_stat(struct seq_file *p, void *v)
25{ 55{
26 int i, j; 56 int i, j;
@@ -42,9 +72,8 @@ static int show_stat(struct seq_file *p, void *v)
42 user = cputime64_add(user, kstat_cpu(i).cpustat.user); 72 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
43 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 73 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
44 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 74 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
45 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); 75 idle = cputime64_add(idle, get_idle_time(i));
46 idle = cputime64_add(idle, arch_idle_time(i)); 76 iowait = cputime64_add(iowait, get_iowait_time(i));
47 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
48 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 77 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
49 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 78 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
50 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 79 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
@@ -76,14 +105,12 @@ static int show_stat(struct seq_file *p, void *v)
76 (unsigned long long)cputime64_to_clock_t(guest), 105 (unsigned long long)cputime64_to_clock_t(guest),
77 (unsigned long long)cputime64_to_clock_t(guest_nice)); 106 (unsigned long long)cputime64_to_clock_t(guest_nice));
78 for_each_online_cpu(i) { 107 for_each_online_cpu(i) {
79
80 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 108 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
81 user = kstat_cpu(i).cpustat.user; 109 user = kstat_cpu(i).cpustat.user;
82 nice = kstat_cpu(i).cpustat.nice; 110 nice = kstat_cpu(i).cpustat.nice;
83 system = kstat_cpu(i).cpustat.system; 111 system = kstat_cpu(i).cpustat.system;
84 idle = kstat_cpu(i).cpustat.idle; 112 idle = get_idle_time(i);
85 idle = cputime64_add(idle, arch_idle_time(i)); 113 iowait = get_iowait_time(i);
86 iowait = kstat_cpu(i).cpustat.iowait;
87 irq = kstat_cpu(i).cpustat.irq; 114 irq = kstat_cpu(i).cpustat.irq;
88 softirq = kstat_cpu(i).cpustat.softirq; 115 softirq = kstat_cpu(i).cpustat.softirq;
89 steal = kstat_cpu(i).cpustat.steal; 116 steal = kstat_cpu(i).cpustat.steal;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 25b6a887adb9..5afaa58a8630 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -877,30 +877,54 @@ struct numa_maps_private {
877 struct numa_maps md; 877 struct numa_maps md;
878}; 878};
879 879
880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
881 unsigned long nr_pages)
881{ 882{
882 int count = page_mapcount(page); 883 int count = page_mapcount(page);
883 884
884 md->pages++; 885 md->pages += nr_pages;
885 if (pte_dirty || PageDirty(page)) 886 if (pte_dirty || PageDirty(page))
886 md->dirty++; 887 md->dirty += nr_pages;
887 888
888 if (PageSwapCache(page)) 889 if (PageSwapCache(page))
889 md->swapcache++; 890 md->swapcache += nr_pages;
890 891
891 if (PageActive(page) || PageUnevictable(page)) 892 if (PageActive(page) || PageUnevictable(page))
892 md->active++; 893 md->active += nr_pages;
893 894
894 if (PageWriteback(page)) 895 if (PageWriteback(page))
895 md->writeback++; 896 md->writeback += nr_pages;
896 897
897 if (PageAnon(page)) 898 if (PageAnon(page))
898 md->anon++; 899 md->anon += nr_pages;
899 900
900 if (count > md->mapcount_max) 901 if (count > md->mapcount_max)
901 md->mapcount_max = count; 902 md->mapcount_max = count;
902 903
903 md->node[page_to_nid(page)]++; 904 md->node[page_to_nid(page)] += nr_pages;
905}
906
907static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
908 unsigned long addr)
909{
910 struct page *page;
911 int nid;
912
913 if (!pte_present(pte))
914 return NULL;
915
916 page = vm_normal_page(vma, addr, pte);
917 if (!page)
918 return NULL;
919
920 if (PageReserved(page))
921 return NULL;
922
923 nid = page_to_nid(page);
924 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
925 return NULL;
926
927 return page;
904} 928}
905 929
906static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 930static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
@@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
912 pte_t *pte; 936 pte_t *pte;
913 937
914 md = walk->private; 938 md = walk->private;
915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 939 spin_lock(&walk->mm->page_table_lock);
916 do { 940 if (pmd_trans_huge(*pmd)) {
917 struct page *page; 941 if (pmd_trans_splitting(*pmd)) {
918 int nid; 942 spin_unlock(&walk->mm->page_table_lock);
943 wait_split_huge_page(md->vma->anon_vma, pmd);
944 } else {
945 pte_t huge_pte = *(pte_t *)pmd;
946 struct page *page;
919 947
920 if (!pte_present(*pte)) 948 page = can_gather_numa_stats(huge_pte, md->vma, addr);
921 continue; 949 if (page)
950 gather_stats(page, md, pte_dirty(huge_pte),
951 HPAGE_PMD_SIZE/PAGE_SIZE);
952 spin_unlock(&walk->mm->page_table_lock);
953 return 0;
954 }
955 } else {
956 spin_unlock(&walk->mm->page_table_lock);
957 }
922 958
923 page = vm_normal_page(md->vma, addr, *pte); 959 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
960 do {
961 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
924 if (!page) 962 if (!page)
925 continue; 963 continue;
926 964 gather_stats(page, md, pte_dirty(*pte), 1);
927 if (PageReserved(page))
928 continue;
929
930 nid = page_to_nid(page);
931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
932 continue;
933
934 gather_stats(page, md, pte_dirty(*pte));
935 965
936 } while (pte++, addr += PAGE_SIZE, addr != end); 966 } while (pte++, addr += PAGE_SIZE, addr != end);
937 pte_unmap_unlock(orig_pte, ptl); 967 pte_unmap_unlock(orig_pte, ptl);
@@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
952 return 0; 982 return 0;
953 983
954 md = walk->private; 984 md = walk->private;
955 gather_stats(page, md, pte_dirty(*pte)); 985 gather_stats(page, md, pte_dirty(*pte), 1);
956 return 0; 986 return 0;
957} 987}
958 988
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b34bdb25490c..10b6be3ca280 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -355,7 +355,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
355 * resolution (think about autofs) and thus deadlocks could arise. 355 * resolution (think about autofs) and thus deadlocks could arise.
356 */ 356 */
357 if (cmds == Q_QUOTAON) { 357 if (cmds == Q_QUOTAON) {
358 ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); 358 ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
359 if (ret) 359 if (ret)
360 pathp = ERR_PTR(ret); 360 pathp = ERR_PTR(ret);
361 else 361 else
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a159ba5a35e7..eb711060a6f2 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -291,14 +291,13 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb,
291 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 291 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
292 jb = jb_array + i; 292 jb = jb_array + i;
293 jb->journal_list = NULL; 293 jb->journal_list = NULL;
294 jb->bitmaps = vmalloc(mem); 294 jb->bitmaps = vzalloc(mem);
295 if (!jb->bitmaps) { 295 if (!jb->bitmaps) {
296 reiserfs_warning(sb, "clm-2000", "unable to " 296 reiserfs_warning(sb, "clm-2000", "unable to "
297 "allocate bitmaps for journal lists"); 297 "allocate bitmaps for journal lists");
298 failed = 1; 298 failed = 1;
299 break; 299 break;
300 } 300 }
301 memset(jb->bitmaps, 0, mem);
302 } 301 }
303 if (failed) { 302 if (failed) {
304 free_list_bitmaps(sb, jb_array); 303 free_list_bitmaps(sb, jb_array);
@@ -353,11 +352,10 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
353 if (num_cnodes <= 0) { 352 if (num_cnodes <= 0) {
354 return NULL; 353 return NULL;
355 } 354 }
356 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); 355 head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
357 if (!head) { 356 if (!head) {
358 return NULL; 357 return NULL;
359 } 358 }
360 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
361 head[0].prev = NULL; 359 head[0].prev = NULL;
362 head[0].next = head + 1; 360 head[0].next = head + 1;
363 for (i = 1; i < num_cnodes; i++) { 361 for (i = 1; i < num_cnodes; i++) {
@@ -2685,14 +2683,13 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2685 * dependency inversion warnings. 2683 * dependency inversion warnings.
2686 */ 2684 */
2687 reiserfs_write_unlock(sb); 2685 reiserfs_write_unlock(sb);
2688 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2686 journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
2689 if (!journal) { 2687 if (!journal) {
2690 reiserfs_warning(sb, "journal-1256", 2688 reiserfs_warning(sb, "journal-1256",
2691 "unable to get memory for journal structure"); 2689 "unable to get memory for journal structure");
2692 reiserfs_write_lock(sb); 2690 reiserfs_write_lock(sb);
2693 return 1; 2691 return 1;
2694 } 2692 }
2695 memset(journal, 0, sizeof(struct reiserfs_journal));
2696 INIT_LIST_HEAD(&journal->j_bitmap_nodes); 2693 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
2697 INIT_LIST_HEAD(&journal->j_prealloc_list); 2694 INIT_LIST_HEAD(&journal->j_prealloc_list);
2698 INIT_LIST_HEAD(&journal->j_working_list); 2695 INIT_LIST_HEAD(&journal->j_working_list);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index b6b9b1fe33b0..7483279b482d 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -111,15 +111,13 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
111 /* allocate additional bitmap blocks, reallocate array of bitmap 111 /* allocate additional bitmap blocks, reallocate array of bitmap
112 * block pointers */ 112 * block pointers */
113 bitmap = 113 bitmap =
114 vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); 114 vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
115 if (!bitmap) { 115 if (!bitmap) {
116 /* Journal bitmaps are still supersized, but the memory isn't 116 /* Journal bitmaps are still supersized, but the memory isn't
117 * leaked, so I guess it's ok */ 117 * leaked, so I guess it's ok */
118 printk("reiserfs_resize: unable to allocate memory.\n"); 118 printk("reiserfs_resize: unable to allocate memory.\n");
119 return -ENOMEM; 119 return -ENOMEM;
120 } 120 }
121 memset(bitmap, 0,
122 sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
123 for (i = 0; i < bmap_nr; i++) 121 for (i = 0; i < bmap_nr; i++)
124 bitmap[i] = old_bitmap[i]; 122 bitmap[i] = old_bitmap[i];
125 123
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index ef66c18a9332..534668fa41be 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -66,8 +66,8 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
66 if (IS_PRIVATE(dir)) 66 if (IS_PRIVATE(dir))
67 return 0; 67 return 0;
68 68
69 error = security_inode_init_security(inode, dir, qstr, &sec->name, 69 error = security_old_inode_init_security(inode, dir, qstr, &sec->name,
70 &sec->value, &sec->length); 70 &sec->value, &sec->length);
71 if (error) { 71 if (error) {
72 if (error == -EOPNOTSUPP) 72 if (error == -EOPNOTSUPP)
73 error = 0; 73 error = 0;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 1360d4f88f41..048b59d5b2f0 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -19,9 +19,9 @@ config SQUASHFS
19 19
20 If you want to compile this as a module ( = code which can be 20 If you want to compile this as a module ( = code which can be
21 inserted in and removed from the running kernel whenever you want), 21 inserted in and removed from the running kernel whenever you want),
22 say M here and read <file:Documentation/modules.txt>. The module 22 say M here. The module will be called squashfs. Note that the root
23 will be called squashfs. Note that the root file system (the one 23 file system (the one containing the directory /) cannot be compiled
24 containing the directory /) cannot be compiled as a module. 24 as a module.
25 25
26 If unsure, say N. 26 If unsure, say N.
27 27
diff --git a/fs/stat.c b/fs/stat.c
index ba5316ffac61..78a3aa83c7ea 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
81 81
82 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
83 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
84 if (flag & AT_NO_AUTOMOUNT)
85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH) 84 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY; 85 lookup_flags |= LOOKUP_EMPTY;
88 86
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ea9120a830d8..48ffbdf0d017 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,20 +43,48 @@ static DEFINE_IDA(sysfs_ino_ida);
43static void sysfs_link_sibling(struct sysfs_dirent *sd) 43static void sysfs_link_sibling(struct sysfs_dirent *sd)
44{ 44{
45 struct sysfs_dirent *parent_sd = sd->s_parent; 45 struct sysfs_dirent *parent_sd = sd->s_parent;
46 struct sysfs_dirent **pos;
47 46
48 BUG_ON(sd->s_sibling); 47 struct rb_node **p;
49 48 struct rb_node *parent;
50 /* Store directory entries in order by ino. This allows 49
51 * readdir to properly restart without having to add a 50 if (sysfs_type(sd) == SYSFS_DIR)
52 * cursor into the s_dir.children list. 51 parent_sd->s_dir.subdirs++;
53 */ 52
54 for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) { 53 p = &parent_sd->s_dir.inode_tree.rb_node;
55 if (sd->s_ino < (*pos)->s_ino) 54 parent = NULL;
56 break; 55 while (*p) {
56 parent = *p;
57#define node rb_entry(parent, struct sysfs_dirent, inode_node)
58 if (sd->s_ino < node->s_ino) {
59 p = &node->inode_node.rb_left;
60 } else if (sd->s_ino > node->s_ino) {
61 p = &node->inode_node.rb_right;
62 } else {
63 printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n",
64 (unsigned long) sd->s_ino);
65 BUG();
66 }
67#undef node
57 } 68 }
58 sd->s_sibling = *pos; 69 rb_link_node(&sd->inode_node, parent, p);
59 *pos = sd; 70 rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
71
72 p = &parent_sd->s_dir.name_tree.rb_node;
73 parent = NULL;
74 while (*p) {
75 int c;
76 parent = *p;
77#define node rb_entry(parent, struct sysfs_dirent, name_node)
78 c = strcmp(sd->s_name, node->s_name);
79 if (c < 0) {
80 p = &node->name_node.rb_left;
81 } else {
82 p = &node->name_node.rb_right;
83 }
84#undef node
85 }
86 rb_link_node(&sd->name_node, parent, p);
87 rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree);
60} 88}
61 89
62/** 90/**
@@ -71,16 +99,11 @@ static void sysfs_link_sibling(struct sysfs_dirent *sd)
71 */ 99 */
72static void sysfs_unlink_sibling(struct sysfs_dirent *sd) 100static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
73{ 101{
74 struct sysfs_dirent **pos; 102 if (sysfs_type(sd) == SYSFS_DIR)
103 sd->s_parent->s_dir.subdirs--;
75 104
76 for (pos = &sd->s_parent->s_dir.children; *pos; 105 rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree);
77 pos = &(*pos)->s_sibling) { 106 rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
78 if (*pos == sd) {
79 *pos = sd->s_sibling;
80 sd->s_sibling = NULL;
81 break;
82 }
83 }
84} 107}
85 108
86/** 109/**
@@ -126,7 +149,6 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
126 */ 149 */
127void sysfs_put_active(struct sysfs_dirent *sd) 150void sysfs_put_active(struct sysfs_dirent *sd)
128{ 151{
129 struct completion *cmpl;
130 int v; 152 int v;
131 153
132 if (unlikely(!sd)) 154 if (unlikely(!sd))
@@ -138,10 +160,9 @@ void sysfs_put_active(struct sysfs_dirent *sd)
138 return; 160 return;
139 161
140 /* atomic_dec_return() is a mb(), we'll always see the updated 162 /* atomic_dec_return() is a mb(), we'll always see the updated
141 * sd->s_sibling. 163 * sd->u.completion.
142 */ 164 */
143 cmpl = (void *)sd->s_sibling; 165 complete(sd->u.completion);
144 complete(cmpl);
145} 166}
146 167
147/** 168/**
@@ -155,16 +176,16 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
155 DECLARE_COMPLETION_ONSTACK(wait); 176 DECLARE_COMPLETION_ONSTACK(wait);
156 int v; 177 int v;
157 178
158 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 179 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
159 180
160 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) 181 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
161 return; 182 return;
162 183
163 sd->s_sibling = (void *)&wait; 184 sd->u.completion = (void *)&wait;
164 185
165 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); 186 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
166 /* atomic_add_return() is a mb(), put_active() will always see 187 /* atomic_add_return() is a mb(), put_active() will always see
167 * the updated sd->s_sibling. 188 * the updated sd->u.completion.
168 */ 189 */
169 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 190 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
170 191
@@ -173,8 +194,6 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
173 wait_for_completion(&wait); 194 wait_for_completion(&wait);
174 } 195 }
175 196
176 sd->s_sibling = NULL;
177
178 lock_acquired(&sd->dep_map, _RET_IP_); 197 lock_acquired(&sd->dep_map, _RET_IP_);
179 rwsem_release(&sd->dep_map, 1, _RET_IP_); 198 rwsem_release(&sd->dep_map, 1, _RET_IP_);
180} 199}
@@ -384,6 +403,13 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
384{ 403{
385 struct sysfs_inode_attrs *ps_iattr; 404 struct sysfs_inode_attrs *ps_iattr;
386 405
406 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
407 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
408 sysfs_ns_type(acxt->parent_sd)? "required": "invalid",
409 acxt->parent_sd->s_name, sd->s_name);
410 return -EINVAL;
411 }
412
387 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) 413 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
388 return -EEXIST; 414 return -EEXIST;
389 415
@@ -490,7 +516,7 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
490 } 516 }
491 517
492 sd->s_flags |= SYSFS_FLAG_REMOVED; 518 sd->s_flags |= SYSFS_FLAG_REMOVED;
493 sd->s_sibling = acxt->removed; 519 sd->u.removed_list = acxt->removed;
494 acxt->removed = sd; 520 acxt->removed = sd;
495} 521}
496 522
@@ -514,8 +540,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
514 while (acxt->removed) { 540 while (acxt->removed) {
515 struct sysfs_dirent *sd = acxt->removed; 541 struct sysfs_dirent *sd = acxt->removed;
516 542
517 acxt->removed = sd->s_sibling; 543 acxt->removed = sd->u.removed_list;
518 sd->s_sibling = NULL;
519 544
520 sysfs_deactivate(sd); 545 sysfs_deactivate(sd);
521 unmap_bin_file(sd); 546 unmap_bin_file(sd);
@@ -540,15 +565,43 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
540 const void *ns, 565 const void *ns,
541 const unsigned char *name) 566 const unsigned char *name)
542{ 567{
543 struct sysfs_dirent *sd; 568 struct rb_node *p = parent_sd->s_dir.name_tree.rb_node;
569 struct sysfs_dirent *found = NULL;
544 570
545 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { 571 if (!!sysfs_ns_type(parent_sd) != !!ns) {
546 if (ns && sd->s_ns && (sd->s_ns != ns)) 572 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
547 continue; 573 sysfs_ns_type(parent_sd)? "required": "invalid",
548 if (!strcmp(sd->s_name, name)) 574 parent_sd->s_name, name);
549 return sd; 575 return NULL;
550 } 576 }
551 return NULL; 577
578 while (p) {
579 int c;
580#define node rb_entry(p, struct sysfs_dirent, name_node)
581 c = strcmp(name, node->s_name);
582 if (c < 0) {
583 p = node->name_node.rb_left;
584 } else if (c > 0) {
585 p = node->name_node.rb_right;
586 } else {
587 found = node;
588 p = node->name_node.rb_left;
589 }
590#undef node
591 }
592
593 if (found) {
594 while (found->s_ns != ns) {
595 p = rb_next(&found->name_node);
596 if (!p)
597 return NULL;
598 found = rb_entry(p, struct sysfs_dirent, name_node);
599 if (strcmp(name, found->s_name))
600 return NULL;
601 }
602 }
603
604 return found;
552} 605}
553 606
554/** 607/**
@@ -744,21 +797,19 @@ void sysfs_remove_subdir(struct sysfs_dirent *sd)
744static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) 797static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
745{ 798{
746 struct sysfs_addrm_cxt acxt; 799 struct sysfs_addrm_cxt acxt;
747 struct sysfs_dirent **pos; 800 struct rb_node *pos;
748 801
749 if (!dir_sd) 802 if (!dir_sd)
750 return; 803 return;
751 804
752 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); 805 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
753 sysfs_addrm_start(&acxt, dir_sd); 806 sysfs_addrm_start(&acxt, dir_sd);
754 pos = &dir_sd->s_dir.children; 807 pos = rb_first(&dir_sd->s_dir.inode_tree);
755 while (*pos) { 808 while (pos) {
756 struct sysfs_dirent *sd = *pos; 809 struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node);
757 810 pos = rb_next(pos);
758 if (sysfs_type(sd) != SYSFS_DIR) 811 if (sysfs_type(sd) != SYSFS_DIR)
759 sysfs_remove_one(&acxt, sd); 812 sysfs_remove_one(&acxt, sd);
760 else
761 pos = &(*pos)->s_sibling;
762 } 813 }
763 sysfs_addrm_finish(&acxt); 814 sysfs_addrm_finish(&acxt);
764 815
@@ -881,12 +932,28 @@ static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
881 pos = NULL; 932 pos = NULL;
882 } 933 }
883 if (!pos && (ino > 1) && (ino < INT_MAX)) { 934 if (!pos && (ino > 1) && (ino < INT_MAX)) {
884 pos = parent_sd->s_dir.children; 935 struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node;
885 while (pos && (ino > pos->s_ino)) 936 while (p) {
886 pos = pos->s_sibling; 937#define node rb_entry(p, struct sysfs_dirent, inode_node)
938 if (ino < node->s_ino) {
939 pos = node;
940 p = node->inode_node.rb_left;
941 } else if (ino > node->s_ino) {
942 p = node->inode_node.rb_right;
943 } else {
944 pos = node;
945 break;
946 }
947#undef node
948 }
949 }
950 while (pos && pos->s_ns != ns) {
951 struct rb_node *p = rb_next(&pos->inode_node);
952 if (!p)
953 pos = NULL;
954 else
955 pos = rb_entry(p, struct sysfs_dirent, inode_node);
887 } 956 }
888 while (pos && pos->s_ns && pos->s_ns != ns)
889 pos = pos->s_sibling;
890 return pos; 957 return pos;
891} 958}
892 959
@@ -894,10 +961,13 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
894 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 961 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
895{ 962{
896 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 963 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
897 if (pos) 964 if (pos) do {
898 pos = pos->s_sibling; 965 struct rb_node *p = rb_next(&pos->inode_node);
899 while (pos && pos->s_ns && pos->s_ns != ns) 966 if (!p)
900 pos = pos->s_sibling; 967 pos = NULL;
968 else
969 pos = rb_entry(p, struct sysfs_dirent, inode_node);
970 } while (pos && pos->s_ns != ns);
901 return pos; 971 return pos;
902} 972}
903 973
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1ad8c93c1b85..d4e6080b4b20 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -466,9 +466,6 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
466 mutex_lock(&sysfs_mutex); 466 mutex_lock(&sysfs_mutex);
467 467
468 if (sd && dir) 468 if (sd && dir)
469 /* Only directories are tagged, so no need to pass
470 * a tag explicitly.
471 */
472 sd = sysfs_find_dirent(sd, NULL, dir); 469 sd = sysfs_find_dirent(sd, NULL, dir);
473 if (sd && attr) 470 if (sd && attr)
474 sd = sysfs_find_dirent(sd, NULL, attr); 471 sd = sysfs_find_dirent(sd, NULL, attr);
@@ -488,17 +485,56 @@ const struct file_operations sysfs_file_operations = {
488 .poll = sysfs_poll, 485 .poll = sysfs_poll,
489}; 486};
490 487
488int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
489 const void **pns)
490{
491 struct sysfs_dirent *dir_sd = kobj->sd;
492 const struct sysfs_ops *ops;
493 const void *ns = NULL;
494 int err;
495
496 err = 0;
497 if (!sysfs_ns_type(dir_sd))
498 goto out;
499
500 err = -EINVAL;
501 if (!kobj->ktype)
502 goto out;
503 ops = kobj->ktype->sysfs_ops;
504 if (!ops)
505 goto out;
506 if (!ops->namespace)
507 goto out;
508
509 err = 0;
510 ns = ops->namespace(kobj, attr);
511out:
512 if (err) {
513 WARN(1, KERN_ERR "missing sysfs namespace attribute operation for "
514 "kobject: %s\n", kobject_name(kobj));
515 }
516 *pns = ns;
517 return err;
518}
519
491int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, 520int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
492 const struct attribute *attr, int type, mode_t amode) 521 const struct attribute *attr, int type, mode_t amode)
493{ 522{
494 umode_t mode = (amode & S_IALLUGO) | S_IFREG; 523 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
495 struct sysfs_addrm_cxt acxt; 524 struct sysfs_addrm_cxt acxt;
496 struct sysfs_dirent *sd; 525 struct sysfs_dirent *sd;
526 const void *ns;
497 int rc; 527 int rc;
498 528
529 rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
530 if (rc)
531 return rc;
532
499 sd = sysfs_new_dirent(attr->name, mode, type); 533 sd = sysfs_new_dirent(attr->name, mode, type);
500 if (!sd) 534 if (!sd)
501 return -ENOMEM; 535 return -ENOMEM;
536
537 sd->s_ns = ns;
502 sd->s_attr.attr = (void *)attr; 538 sd->s_attr.attr = (void *)attr;
503 sysfs_dirent_init_lockdep(sd); 539 sysfs_dirent_init_lockdep(sd);
504 540
@@ -586,12 +622,17 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
586{ 622{
587 struct sysfs_dirent *sd; 623 struct sysfs_dirent *sd;
588 struct iattr newattrs; 624 struct iattr newattrs;
625 const void *ns;
589 int rc; 626 int rc;
590 627
628 rc = sysfs_attr_ns(kobj, attr, &ns);
629 if (rc)
630 return rc;
631
591 mutex_lock(&sysfs_mutex); 632 mutex_lock(&sysfs_mutex);
592 633
593 rc = -ENOENT; 634 rc = -ENOENT;
594 sd = sysfs_find_dirent(kobj->sd, NULL, attr->name); 635 sd = sysfs_find_dirent(kobj->sd, ns, attr->name);
595 if (!sd) 636 if (!sd)
596 goto out; 637 goto out;
597 638
@@ -616,7 +657,12 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
616 657
617void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 658void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
618{ 659{
619 sysfs_hash_and_remove(kobj->sd, NULL, attr->name); 660 const void *ns;
661
662 if (sysfs_attr_ns(kobj, attr, &ns))
663 return;
664
665 sysfs_hash_and_remove(kobj->sd, ns, attr->name);
620} 666}
621 667
622void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) 668void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e3f091a81c72..e23f28894a3a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -202,18 +202,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
202 inode->i_ctime = iattr->ia_ctime; 202 inode->i_ctime = iattr->ia_ctime;
203} 203}
204 204
205static int sysfs_count_nlink(struct sysfs_dirent *sd)
206{
207 struct sysfs_dirent *child;
208 int nr = 0;
209
210 for (child = sd->s_dir.children; child; child = child->s_sibling)
211 if (sysfs_type(child) == SYSFS_DIR)
212 nr++;
213
214 return nr + 2;
215}
216
217static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode) 205static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
218{ 206{
219 struct sysfs_inode_attrs *iattrs = sd->s_iattr; 207 struct sysfs_inode_attrs *iattrs = sd->s_iattr;
@@ -230,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
230 } 218 }
231 219
232 if (sysfs_type(sd) == SYSFS_DIR) 220 if (sysfs_type(sd) == SYSFS_DIR)
233 inode->i_nlink = sysfs_count_nlink(sd); 221 inode->i_nlink = sd->s_dir.subdirs + 2;
234} 222}
235 223
236int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
@@ -336,8 +324,6 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
336 sysfs_addrm_start(&acxt, dir_sd); 324 sysfs_addrm_start(&acxt, dir_sd);
337 325
338 sd = sysfs_find_dirent(dir_sd, ns, name); 326 sd = sysfs_find_dirent(dir_sd, ns, name);
339 if (sd && (sd->s_ns != ns))
340 sd = NULL;
341 if (sd) 327 if (sd)
342 sysfs_remove_one(&acxt, sd); 328 sysfs_remove_one(&acxt, sd);
343 329
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 845ab3ad229d..ce29e28b766d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -11,14 +11,18 @@
11#include <linux/lockdep.h> 11#include <linux/lockdep.h>
12#include <linux/kobject_ns.h> 12#include <linux/kobject_ns.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/rbtree.h>
14 15
15struct sysfs_open_dirent; 16struct sysfs_open_dirent;
16 17
17/* type-specific structures for sysfs_dirent->s_* union members */ 18/* type-specific structures for sysfs_dirent->s_* union members */
18struct sysfs_elem_dir { 19struct sysfs_elem_dir {
19 struct kobject *kobj; 20 struct kobject *kobj;
20 /* children list starts here and goes through sd->s_sibling */ 21
21 struct sysfs_dirent *children; 22 unsigned long subdirs;
23
24 struct rb_root inode_tree;
25 struct rb_root name_tree;
22}; 26};
23 27
24struct sysfs_elem_symlink { 28struct sysfs_elem_symlink {
@@ -56,9 +60,16 @@ struct sysfs_dirent {
56 struct lockdep_map dep_map; 60 struct lockdep_map dep_map;
57#endif 61#endif
58 struct sysfs_dirent *s_parent; 62 struct sysfs_dirent *s_parent;
59 struct sysfs_dirent *s_sibling;
60 const char *s_name; 63 const char *s_name;
61 64
65 struct rb_node inode_node;
66 struct rb_node name_node;
67
68 union {
69 struct completion *completion;
70 struct sysfs_dirent *removed_list;
71 } u;
72
62 const void *s_ns; /* namespace tag */ 73 const void *s_ns; /* namespace tag */
63 union { 74 union {
64 struct sysfs_elem_dir s_dir; 75 struct sysfs_elem_dir s_dir;
diff --git a/fs/xattr.c b/fs/xattr.c
index f060663ab70c..67583de8218c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -14,6 +14,7 @@
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/namei.h> 15#include <linux/namei.h>
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/evm.h>
17#include <linux/syscalls.h> 18#include <linux/syscalls.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
@@ -166,6 +167,64 @@ out_noalloc:
166} 167}
167EXPORT_SYMBOL_GPL(xattr_getsecurity); 168EXPORT_SYMBOL_GPL(xattr_getsecurity);
168 169
170/*
171 * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
172 *
173 * Allocate memory, if not already allocated, or re-allocate correct size,
174 * before retrieving the extended attribute.
175 *
176 * Returns the result of alloc, if failed, or the getxattr operation.
177 */
178ssize_t
179vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
180 size_t xattr_size, gfp_t flags)
181{
182 struct inode *inode = dentry->d_inode;
183 char *value = *xattr_value;
184 int error;
185
186 error = xattr_permission(inode, name, MAY_READ);
187 if (error)
188 return error;
189
190 if (!inode->i_op->getxattr)
191 return -EOPNOTSUPP;
192
193 error = inode->i_op->getxattr(dentry, name, NULL, 0);
194 if (error < 0)
195 return error;
196
197 if (!value || (error > xattr_size)) {
198 value = krealloc(*xattr_value, error + 1, flags);
199 if (!value)
200 return -ENOMEM;
201 memset(value, 0, error + 1);
202 }
203
204 error = inode->i_op->getxattr(dentry, name, value, error);
205 *xattr_value = value;
206 return error;
207}
208
209/* Compare an extended attribute value with the given value */
210int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
211 const char *value, size_t size, gfp_t flags)
212{
213 char *xattr_value = NULL;
214 int rc;
215
216 rc = vfs_getxattr_alloc(dentry, xattr_name, &xattr_value, 0, flags);
217 if (rc < 0)
218 return rc;
219
220 if ((rc != size) || (memcmp(xattr_value, value, rc) != 0))
221 rc = -EINVAL;
222 else
223 rc = 0;
224 kfree(xattr_value);
225 return rc;
226}
227
169ssize_t 228ssize_t
170vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) 229vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
171{ 230{
@@ -243,8 +302,10 @@ vfs_removexattr(struct dentry *dentry, const char *name)
243 error = inode->i_op->removexattr(dentry, name); 302 error = inode->i_op->removexattr(dentry, name);
244 mutex_unlock(&inode->i_mutex); 303 mutex_unlock(&inode->i_mutex);
245 304
246 if (!error) 305 if (!error) {
247 fsnotify_xattr(dentry); 306 fsnotify_xattr(dentry);
307 evm_inode_post_removexattr(dentry, name);
308 }
248 return error; 309 return error;
249} 310}
250EXPORT_SYMBOL_GPL(vfs_removexattr); 311EXPORT_SYMBOL_GPL(vfs_removexattr);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index f7c8f7a9ea6d..292eff198030 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -61,12 +61,7 @@ extern void kmem_free(const void *);
61 61
62static inline void *kmem_zalloc_large(size_t size) 62static inline void *kmem_zalloc_large(size_t size)
63{ 63{
64 void *ptr; 64 return vzalloc(size);
65
66 ptr = vmalloc(size);
67 if (ptr)
68 memset(ptr, 0, size);
69 return ptr;
70} 65}
71static inline void kmem_free_large(void *ptr) 66static inline void kmem_free_large(void *ptr)
72{ 67{
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 63e971e2b837..8c37dde4c521 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1300,6 +1300,7 @@ xfs_end_io_direct_write(
1300 bool is_async) 1300 bool is_async)
1301{ 1301{
1302 struct xfs_ioend *ioend = iocb->private; 1302 struct xfs_ioend *ioend = iocb->private;
1303 struct inode *inode = ioend->io_inode;
1303 1304
1304 /* 1305 /*
1305 * blockdev_direct_IO can return an error even after the I/O 1306 * blockdev_direct_IO can return an error even after the I/O
@@ -1331,7 +1332,7 @@ xfs_end_io_direct_write(
1331 } 1332 }
1332 1333
1333 /* XXX: probably should move into the real I/O completion handler */ 1334 /* XXX: probably should move into the real I/O completion handler */
1334 inode_dio_done(ioend->io_inode); 1335 inode_dio_done(inode);
1335} 1336}
1336 1337
1337STATIC ssize_t 1338STATIC ssize_t
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cac2ecfa6746..ef43fce519a1 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -629,7 +629,7 @@ xfs_buf_item_push(
629 * the xfsbufd to get this buffer written. We have to unlock the buffer 629 * the xfsbufd to get this buffer written. We have to unlock the buffer
630 * to allow the xfsbufd to write it, too. 630 * to allow the xfsbufd to write it, too.
631 */ 631 */
632STATIC void 632STATIC bool
633xfs_buf_item_pushbuf( 633xfs_buf_item_pushbuf(
634 struct xfs_log_item *lip) 634 struct xfs_log_item *lip)
635{ 635{
@@ -643,6 +643,7 @@ xfs_buf_item_pushbuf(
643 643
644 xfs_buf_delwri_promote(bp); 644 xfs_buf_delwri_promote(bp);
645 xfs_buf_relse(bp); 645 xfs_buf_relse(bp);
646 return true;
646} 647}
647 648
648STATIC void 649STATIC void
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 9e0e2fa3f2c8..bb3f71d236d2 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
183 * search the buffer cache can be a time consuming thing, and AIL lock is a 183 * search the buffer cache can be a time consuming thing, and AIL lock is a
184 * spinlock. 184 * spinlock.
185 */ 185 */
186STATIC void 186STATIC bool
187xfs_qm_dquot_logitem_pushbuf( 187xfs_qm_dquot_logitem_pushbuf(
188 struct xfs_log_item *lip) 188 struct xfs_log_item *lip)
189{ 189{
190 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); 190 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
191 struct xfs_dquot *dqp = qlip->qli_dquot; 191 struct xfs_dquot *dqp = qlip->qli_dquot;
192 struct xfs_buf *bp; 192 struct xfs_buf *bp;
193 bool ret = true;
193 194
194 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 195 ASSERT(XFS_DQ_IS_LOCKED(dqp));
195 196
@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
201 if (completion_done(&dqp->q_flush) || 202 if (completion_done(&dqp->q_flush) ||
202 !(lip->li_flags & XFS_LI_IN_AIL)) { 203 !(lip->li_flags & XFS_LI_IN_AIL)) {
203 xfs_dqunlock(dqp); 204 xfs_dqunlock(dqp);
204 return; 205 return true;
205 } 206 }
206 207
207 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, 208 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
208 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); 209 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
209 xfs_dqunlock(dqp); 210 xfs_dqunlock(dqp);
210 if (!bp) 211 if (!bp)
211 return; 212 return true;
212 if (XFS_BUF_ISDELAYWRITE(bp)) 213 if (XFS_BUF_ISDELAYWRITE(bp))
213 xfs_buf_delwri_promote(bp); 214 xfs_buf_delwri_promote(bp);
215 if (xfs_buf_ispinned(bp))
216 ret = false;
214 xfs_buf_relse(bp); 217 xfs_buf_relse(bp);
218 return ret;
215} 219}
216 220
217/* 221/*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 588406dc6a35..836ad80d4f2b 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -708,13 +708,14 @@ xfs_inode_item_committed(
708 * marked delayed write. If that's the case, we'll promote it and that will 708 * marked delayed write. If that's the case, we'll promote it and that will
709 * allow the caller to write the buffer by triggering the xfsbufd to run. 709 * allow the caller to write the buffer by triggering the xfsbufd to run.
710 */ 710 */
711STATIC void 711STATIC bool
712xfs_inode_item_pushbuf( 712xfs_inode_item_pushbuf(
713 struct xfs_log_item *lip) 713 struct xfs_log_item *lip)
714{ 714{
715 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 715 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
716 struct xfs_inode *ip = iip->ili_inode; 716 struct xfs_inode *ip = iip->ili_inode;
717 struct xfs_buf *bp; 717 struct xfs_buf *bp;
718 bool ret = true;
718 719
719 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 720 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
720 721
@@ -725,7 +726,7 @@ xfs_inode_item_pushbuf(
725 if (completion_done(&ip->i_flush) || 726 if (completion_done(&ip->i_flush) ||
726 !(lip->li_flags & XFS_LI_IN_AIL)) { 727 !(lip->li_flags & XFS_LI_IN_AIL)) {
727 xfs_iunlock(ip, XFS_ILOCK_SHARED); 728 xfs_iunlock(ip, XFS_ILOCK_SHARED);
728 return; 729 return true;
729 } 730 }
730 731
731 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, 732 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
@@ -733,10 +734,13 @@ xfs_inode_item_pushbuf(
733 734
734 xfs_iunlock(ip, XFS_ILOCK_SHARED); 735 xfs_iunlock(ip, XFS_ILOCK_SHARED);
735 if (!bp) 736 if (!bp)
736 return; 737 return true;
737 if (XFS_BUF_ISDELAYWRITE(bp)) 738 if (XFS_BUF_ISDELAYWRITE(bp))
738 xfs_buf_delwri_promote(bp); 739 xfs_buf_delwri_promote(bp);
740 if (xfs_buf_ispinned(bp))
741 ret = false;
739 xfs_buf_relse(bp); 742 xfs_buf_relse(bp);
743 return ret;
740} 744}
741 745
742/* 746/*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 673704fab748..28856accb4fa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -102,37 +102,38 @@ xfs_mark_inode_dirty(
102 102
103} 103}
104 104
105
106int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
107 void *fs_info)
108{
109 const struct xattr *xattr;
110 struct xfs_inode *ip = XFS_I(inode);
111 int error = 0;
112
113 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
114 error = xfs_attr_set(ip, xattr->name, xattr->value,
115 xattr->value_len, ATTR_SECURE);
116 if (error < 0)
117 break;
118 }
119 return error;
120}
121
105/* 122/*
106 * Hook in SELinux. This is not quite correct yet, what we really need 123 * Hook in SELinux. This is not quite correct yet, what we really need
107 * here (as we do for default ACLs) is a mechanism by which creation of 124 * here (as we do for default ACLs) is a mechanism by which creation of
108 * these attrs can be journalled at inode creation time (along with the 125 * these attrs can be journalled at inode creation time (along with the
109 * inode, of course, such that log replay can't cause these to be lost). 126 * inode, of course, such that log replay can't cause these to be lost).
110 */ 127 */
128
111STATIC int 129STATIC int
112xfs_init_security( 130xfs_init_security(
113 struct inode *inode, 131 struct inode *inode,
114 struct inode *dir, 132 struct inode *dir,
115 const struct qstr *qstr) 133 const struct qstr *qstr)
116{ 134{
117 struct xfs_inode *ip = XFS_I(inode); 135 return security_inode_init_security(inode, dir, qstr,
118 size_t length; 136 &xfs_initxattrs, NULL);
119 void *value;
120 unsigned char *name;
121 int error;
122
123 error = security_inode_init_security(inode, dir, qstr, (char **)&name,
124 &value, &length);
125 if (error) {
126 if (error == -EOPNOTSUPP)
127 return 0;
128 return -error;
129 }
130
131 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
132
133 kfree(name);
134 kfree(value);
135 return error;
136} 137}
137 138
138static void 139static void
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 1e8a45e74c3e..828662f70d64 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -68,6 +68,8 @@
68#include <linux/ctype.h> 68#include <linux/ctype.h>
69#include <linux/writeback.h> 69#include <linux/writeback.h>
70#include <linux/capability.h> 70#include <linux/capability.h>
71#include <linux/kthread.h>
72#include <linux/freezer.h>
71#include <linux/list_sort.h> 73#include <linux/list_sort.h>
72 74
73#include <asm/page.h> 75#include <asm/page.h>
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2366c54cc4fa..5cf06b85fd9d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1652,24 +1652,13 @@ xfs_init_workqueues(void)
1652 */ 1652 */
1653 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1653 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1654 if (!xfs_syncd_wq) 1654 if (!xfs_syncd_wq)
1655 goto out; 1655 return -ENOMEM;
1656
1657 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1658 if (!xfs_ail_wq)
1659 goto out_destroy_syncd;
1660
1661 return 0; 1656 return 0;
1662
1663out_destroy_syncd:
1664 destroy_workqueue(xfs_syncd_wq);
1665out:
1666 return -ENOMEM;
1667} 1657}
1668 1658
1669STATIC void 1659STATIC void
1670xfs_destroy_workqueues(void) 1660xfs_destroy_workqueues(void)
1671{ 1661{
1672 destroy_workqueue(xfs_ail_wq);
1673 destroy_workqueue(xfs_syncd_wq); 1662 destroy_workqueue(xfs_syncd_wq);
1674} 1663}
1675 1664
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 06a9759b6352..53597f4db9b5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
350 void (*iop_unlock)(xfs_log_item_t *); 350 void (*iop_unlock)(xfs_log_item_t *);
351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
352 void (*iop_push)(xfs_log_item_t *); 352 void (*iop_push)(xfs_log_item_t *);
353 void (*iop_pushbuf)(xfs_log_item_t *); 353 bool (*iop_pushbuf)(xfs_log_item_t *);
354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
355} xfs_item_ops_t; 355} xfs_item_ops_t;
356 356
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index c15aa29fa169..3a1e7ca54c2d 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,6 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32
33#ifdef DEBUG 31#ifdef DEBUG
34/* 32/*
35 * Check that the list is sorted as it should be. 33 * Check that the list is sorted as it should be.
@@ -356,16 +354,10 @@ xfs_ail_delete(
356 xfs_trans_ail_cursor_clear(ailp, lip); 354 xfs_trans_ail_cursor_clear(ailp, lip);
357} 355}
358 356
359/* 357static long
360 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself 358xfsaild_push(
361 * to run at a later time if there is more work to do to complete the push. 359 struct xfs_ail *ailp)
362 */
363STATIC void
364xfs_ail_worker(
365 struct work_struct *work)
366{ 360{
367 struct xfs_ail *ailp = container_of(to_delayed_work(work),
368 struct xfs_ail, xa_work);
369 xfs_mount_t *mp = ailp->xa_mount; 361 xfs_mount_t *mp = ailp->xa_mount;
370 struct xfs_ail_cursor cur; 362 struct xfs_ail_cursor cur;
371 xfs_log_item_t *lip; 363 xfs_log_item_t *lip;
@@ -427,8 +419,13 @@ xfs_ail_worker(
427 419
428 case XFS_ITEM_PUSHBUF: 420 case XFS_ITEM_PUSHBUF:
429 XFS_STATS_INC(xs_push_ail_pushbuf); 421 XFS_STATS_INC(xs_push_ail_pushbuf);
430 IOP_PUSHBUF(lip); 422
431 ailp->xa_last_pushed_lsn = lsn; 423 if (!IOP_PUSHBUF(lip)) {
424 stuck++;
425 flush_log = 1;
426 } else {
427 ailp->xa_last_pushed_lsn = lsn;
428 }
432 push_xfsbufd = 1; 429 push_xfsbufd = 1;
433 break; 430 break;
434 431
@@ -440,7 +437,6 @@ xfs_ail_worker(
440 437
441 case XFS_ITEM_LOCKED: 438 case XFS_ITEM_LOCKED:
442 XFS_STATS_INC(xs_push_ail_locked); 439 XFS_STATS_INC(xs_push_ail_locked);
443 ailp->xa_last_pushed_lsn = lsn;
444 stuck++; 440 stuck++;
445 break; 441 break;
446 442
@@ -501,20 +497,6 @@ out_done:
501 /* We're past our target or empty, so idle */ 497 /* We're past our target or empty, so idle */
502 ailp->xa_last_pushed_lsn = 0; 498 ailp->xa_last_pushed_lsn = 0;
503 499
504 /*
505 * We clear the XFS_AIL_PUSHING_BIT first before checking
506 * whether the target has changed. If the target has changed,
507 * this pushes the requeue race directly onto the result of the
508 * atomic test/set bit, so we are guaranteed that either the
509 * the pusher that changed the target or ourselves will requeue
510 * the work (but not both).
511 */
512 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
513 smp_rmb();
514 if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
515 test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
516 return;
517
518 tout = 50; 500 tout = 50;
519 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 501 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
520 /* 502 /*
@@ -537,9 +519,30 @@ out_done:
537 tout = 20; 519 tout = 20;
538 } 520 }
539 521
540 /* There is more to do, requeue us. */ 522 return tout;
541 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 523}
542 msecs_to_jiffies(tout)); 524
525static int
526xfsaild(
527 void *data)
528{
529 struct xfs_ail *ailp = data;
530 long tout = 0; /* milliseconds */
531
532 while (!kthread_should_stop()) {
533 if (tout && tout <= 20)
534 __set_current_state(TASK_KILLABLE);
535 else
536 __set_current_state(TASK_INTERRUPTIBLE);
537 schedule_timeout(tout ?
538 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
539
540 try_to_freeze();
541
542 tout = xfsaild_push(ailp);
543 }
544
545 return 0;
543} 546}
544 547
545/* 548/*
@@ -574,8 +577,9 @@ xfs_ail_push(
574 */ 577 */
575 smp_wmb(); 578 smp_wmb();
576 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); 579 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
577 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) 580 smp_wmb();
578 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); 581
582 wake_up_process(ailp->xa_task);
579} 583}
580 584
581/* 585/*
@@ -813,9 +817,18 @@ xfs_trans_ail_init(
813 INIT_LIST_HEAD(&ailp->xa_ail); 817 INIT_LIST_HEAD(&ailp->xa_ail);
814 INIT_LIST_HEAD(&ailp->xa_cursors); 818 INIT_LIST_HEAD(&ailp->xa_cursors);
815 spin_lock_init(&ailp->xa_lock); 819 spin_lock_init(&ailp->xa_lock);
816 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); 820
821 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
822 ailp->xa_mount->m_fsname);
823 if (IS_ERR(ailp->xa_task))
824 goto out_free_ailp;
825
817 mp->m_ail = ailp; 826 mp->m_ail = ailp;
818 return 0; 827 return 0;
828
829out_free_ailp:
830 kmem_free(ailp);
831 return ENOMEM;
819} 832}
820 833
821void 834void
@@ -824,6 +837,6 @@ xfs_trans_ail_destroy(
824{ 837{
825 struct xfs_ail *ailp = mp->m_ail; 838 struct xfs_ail *ailp = mp->m_ail;
826 839
827 cancel_delayed_work_sync(&ailp->xa_work); 840 kthread_stop(ailp->xa_task);
828 kmem_free(ailp); 841 kmem_free(ailp);
829} 842}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 212946b97239..22750b5e4a8f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -64,23 +64,17 @@ struct xfs_ail_cursor {
64 */ 64 */
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct task_struct *xa_task;
67 struct list_head xa_ail; 68 struct list_head xa_ail;
68 xfs_lsn_t xa_target; 69 xfs_lsn_t xa_target;
69 struct list_head xa_cursors; 70 struct list_head xa_cursors;
70 spinlock_t xa_lock; 71 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn; 72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
74}; 73};
75 74
76#define XFS_AIL_PUSHING_BIT 0
77
78/* 75/*
79 * From xfs_trans_ail.c 76 * From xfs_trans_ail.c
80 */ 77 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
85 struct xfs_ail_cursor *cur, 79 struct xfs_ail_cursor *cur,
86 struct xfs_log_item **log_items, int nr_items, 80 struct xfs_log_item **log_items, int nr_items,