aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile3
-rw-r--r--fs/adfs/Kconfig1
-rw-r--r--fs/adfs/dir.c6
-rw-r--r--fs/adfs/inode.c6
-rw-r--r--fs/adfs/super.c13
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c56
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c148
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/cachefiles/namei.c52
-rw-r--r--fs/ceph/dir.c30
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/debugfs/inode.c26
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/namei.c17
-rw-r--r--fs/ext2/xattr.h6
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext3/balloc.c21
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/namei.c17
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/ext3/xattr.h4
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c64
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c77
-rw-r--r--fs/gfs2/glock.c410
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c15
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/hpfs/Kconfig2
-rw-r--r--fs/hpfs/dir.c23
-rw-r--r--fs/hpfs/file.c9
-rw-r--r--fs/hpfs/hpfs_fn.h22
-rw-r--r--fs/hpfs/inode.c9
-rw-r--r--fs/hpfs/namei.c49
-rw-r--r--fs/hpfs/super.c23
-rw-r--r--fs/inode.c63
-rw-r--r--fs/internal.h15
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/security.c5
-rw-r--r--fs/jffs2/write.c18
-rw-r--r--fs/jffs2/xattr.h5
-rw-r--r--fs/jfs/jfs_xattr.h5
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/xattr.c6
-rw-r--r--fs/locks.c1
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1493
-rw-r--r--fs/namespace.c61
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c131
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c42
-rw-r--r--fs/nfs/idmap.c90
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h43
-rw-r--r--fs/nfs/namespace.c66
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4_fs.h38
-rw-r--r--fs/nfs/nfs4filelayout.c361
-rw-r--r--fs/nfs/nfs4filelayout.h19
-rw-r--r--fs/nfs/nfs4filelayoutdev.c256
-rw-r--r--fs/nfs/nfs4namespace.c41
-rw-r--r--fs/nfs/nfs4proc.c254
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c42
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pagelist.c22
-rw-r--r--fs/nfs/pnfs.c330
-rw-r--r--fs/nfs/pnfs.h118
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c127
-rw-r--r--fs/nfs/super.c478
-rw-r--r--fs/nfs/unlink.c22
-rw-r--r--fs/nfs/write.c155
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/super.c35
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c137
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/pstore/Kconfig13
-rw-r--r--fs/pstore/Makefile7
-rw-r--r--fs/pstore/inode.c285
-rw-r--r--fs/pstore/internal.h7
-rw-r--r--fs/pstore/platform.c202
-rw-r--r--fs/quota/quota_v2.c2
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c15
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/super.c67
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/balloc.c4
-rw-r--r--fs/udf/file.c7
-rw-r--r--fs/udf/inode.c239
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/udf/truncate.c146
-rw-r--r--fs/udf/udfdecl.h12
-rw-r--r--fs/ufs/Kconfig1
-rw-r--r--fs/ufs/inode.c78
-rw-r--r--fs/ufs/namei.c44
-rw-r--r--fs/ufs/super.c64
-rw-r--r--fs/ufs/truncate.c5
-rw-r--r--fs/ufs/ufs.h6
-rw-r--r--fs/ufs/util.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_mru_cache.c2
214 files changed, 6752 insertions, 3923 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf616318..515455296378 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
21#include <linux/posix_acl_xattr.h> 21#include <linux/posix_acl_xattr.h>
22#include "xattr.h" 22#include "xattr.h"
23#include "acl.h" 23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h" 24#include "v9fs.h"
25#include "v9fs_vfs.h"
26 26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) 27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
59 struct v9fs_session_info *v9ses; 59 struct v9fs_session_info *v9ses;
60 60
61 v9ses = v9fs_inode2v9ses(inode); 61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 62 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
63 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); 64 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); 65 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0; 66 return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) { 72 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); 73 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); 74 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else 75 } else
77 retval = -EIO; 76 retval = -EIO;
78 77
78 if (!IS_ERR(dacl))
79 posix_acl_release(dacl);
80
81 if (!IS_ERR(pacl))
82 posix_acl_release(pacl);
83
79 return retval; 84 return retval;
80} 85}
81 86
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
100 return -ECHILD; 105 return -ECHILD;
101 106
102 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 108 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
109 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
104 /* 110 /*
105 * On access = client mode get the acl 111 * On access = client and acl = on mode get the acl
106 * values from the server 112 * values from the server
107 */ 113 */
108 return 0; 114 return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
128 struct inode *inode = dentry->d_inode; 134 struct inode *inode = dentry->d_inode;
129 135
130 set_cached_acl(inode, type, acl); 136 set_cached_acl(inode, type, acl);
137
138 if (!acl)
139 return 0;
140
131 /* Set a setxattr request to server */ 141 /* Set a setxattr request to server */
132 size = posix_acl_xattr_size(acl->a_count); 142 size = posix_acl_xattr_size(acl->a_count);
133 buffer = kmalloc(size, GFP_KERNEL); 143 buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
177int v9fs_set_create_acl(struct dentry *dentry, 187int v9fs_set_create_acl(struct dentry *dentry,
178 struct posix_acl *dpacl, struct posix_acl *pacl) 188 struct posix_acl *dpacl, struct posix_acl *pacl)
179{ 189{
180 if (dpacl) 190 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
181 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); 191 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
182 if (pacl)
183 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
184 posix_acl_release(dpacl); 192 posix_acl_release(dpacl);
185 posix_acl_release(pacl); 193 posix_acl_release(pacl);
186 return 0; 194 return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac2..5b335c5086a1 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
33 33
34#define CACHETAG_LEN 11 34#define CACHETAG_LEN 11
35 35
36struct kmem_cache *vcookie_cache;
37
38struct fscache_netfs v9fs_cache_netfs = { 36struct fscache_netfs v9fs_cache_netfs = {
39 .name = "9p", 37 .name = "9p",
40 .version = 0, 38 .version = 0,
41}; 39};
42 40
43static void init_once(void *foo)
44{
45 struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
46 vcookie->fscache = NULL;
47 vcookie->qid = NULL;
48 inode_init_once(&vcookie->inode);
49}
50
51/**
52 * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
53 * vcookie to inode mapping
54 *
55 * Returns 0 on success.
56 */
57
58static int v9fs_init_vcookiecache(void)
59{
60 vcookie_cache = kmem_cache_create("vcookie_cache",
61 sizeof(struct v9fs_cookie),
62 0, (SLAB_RECLAIM_ACCOUNT|
63 SLAB_MEM_SPREAD),
64 init_once);
65 if (!vcookie_cache)
66 return -ENOMEM;
67
68 return 0;
69}
70
71/**
72 * v9fs_destroy_vcookiecache - destroy the cache of vcookies
73 *
74 */
75
76static void v9fs_destroy_vcookiecache(void)
77{
78 kmem_cache_destroy(vcookie_cache);
79}
80
81int __v9fs_cache_register(void)
82{
83 int ret;
84 ret = v9fs_init_vcookiecache();
85 if (ret < 0)
86 return ret;
87
88 return fscache_register_netfs(&v9fs_cache_netfs);
89}
90
91void __v9fs_cache_unregister(void)
92{
93 v9fs_destroy_vcookiecache();
94 fscache_unregister_netfs(&v9fs_cache_netfs);
95}
96
97/** 41/**
98 * v9fs_random_cachetag - Generate a random tag to be associated 42 * v9fs_random_cachetag - Generate a random tag to be associated
99 * with a new cache session. 43 * with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
133} 77}
134 78
135const struct fscache_cookie_def v9fs_cache_session_index_def = { 79const struct fscache_cookie_def v9fs_cache_session_index_def = {
136 .name = "9P.session", 80 .name = "9P.session",
137 .type = FSCACHE_COOKIE_TYPE_INDEX, 81 .type = FSCACHE_COOKIE_TYPE_INDEX,
138 .get_key = v9fs_cache_session_get_key, 82 .get_key = v9fs_cache_session_get_key,
139}; 83};
140 84
141void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) 85void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
163static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, 107static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
164 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
165{ 109{
166 const struct v9fs_cookie *vcookie = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
167 memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); 111 memcpy(buffer, &v9inode->fscache_key->path,
168 112 sizeof(v9inode->fscache_key->path));
169 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, 113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
170 vcookie->qid->path); 114 v9inode->fscache_key->path);
171 return sizeof(vcookie->qid->path); 115 return sizeof(v9inode->fscache_key->path);
172} 116}
173 117
174static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
175 uint64_t *size) 119 uint64_t *size)
176{ 120{
177 const struct v9fs_cookie *vcookie = cookie_netfs_data; 121 const struct v9fs_inode *v9inode = cookie_netfs_data;
178 *size = i_size_read(&vcookie->inode); 122 *size = i_size_read(&v9inode->vfs_inode);
179 123
180 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, 124 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
181 *size); 125 *size);
182} 126}
183 127
184static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, 128static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
185 void *buffer, uint16_t buflen) 129 void *buffer, uint16_t buflen)
186{ 130{
187 const struct v9fs_cookie *vcookie = cookie_netfs_data; 131 const struct v9fs_inode *v9inode = cookie_netfs_data;
188 memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); 132 memcpy(buffer, &v9inode->fscache_key->version,
189 133 sizeof(v9inode->fscache_key->version));
190 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, 134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
191 vcookie->qid->version); 135 v9inode->fscache_key->version);
192 return sizeof(vcookie->qid->version); 136 return sizeof(v9inode->fscache_key->version);
193} 137}
194 138
195static enum 139static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
197 const void *buffer, 141 const void *buffer,
198 uint16_t buflen) 142 uint16_t buflen)
199{ 143{
200 const struct v9fs_cookie *vcookie = cookie_netfs_data; 144 const struct v9fs_inode *v9inode = cookie_netfs_data;
201 145
202 if (buflen != sizeof(vcookie->qid->version)) 146 if (buflen != sizeof(v9inode->fscache_key->version))
203 return FSCACHE_CHECKAUX_OBSOLETE; 147 return FSCACHE_CHECKAUX_OBSOLETE;
204 148
205 if (memcmp(buffer, &vcookie->qid->version, 149 if (memcmp(buffer, &v9inode->fscache_key->version,
206 sizeof(vcookie->qid->version))) 150 sizeof(v9inode->fscache_key->version)))
207 return FSCACHE_CHECKAUX_OBSOLETE; 151 return FSCACHE_CHECKAUX_OBSOLETE;
208 152
209 return FSCACHE_CHECKAUX_OKAY; 153 return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
211 155
212static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) 156static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
213{ 157{
214 struct v9fs_cookie *vcookie = cookie_netfs_data; 158 struct v9fs_inode *v9inode = cookie_netfs_data;
215 struct pagevec pvec; 159 struct pagevec pvec;
216 pgoff_t first; 160 pgoff_t first;
217 int loop, nr_pages; 161 int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
220 first = 0; 164 first = 0;
221 165
222 for (;;) { 166 for (;;) {
223 nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, 167 nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
224 first, 168 first,
225 PAGEVEC_SIZE - pagevec_count(&pvec)); 169 PAGEVEC_SIZE - pagevec_count(&pvec));
226 if (!nr_pages) 170 if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
249 193
250void v9fs_cache_inode_get_cookie(struct inode *inode) 194void v9fs_cache_inode_get_cookie(struct inode *inode)
251{ 195{
252 struct v9fs_cookie *vcookie; 196 struct v9fs_inode *v9inode;
253 struct v9fs_session_info *v9ses; 197 struct v9fs_session_info *v9ses;
254 198
255 if (!S_ISREG(inode->i_mode)) 199 if (!S_ISREG(inode->i_mode))
256 return; 200 return;
257 201
258 vcookie = v9fs_inode2cookie(inode); 202 v9inode = V9FS_I(inode);
259 if (vcookie->fscache) 203 if (v9inode->fscache)
260 return; 204 return;
261 205
262 v9ses = v9fs_inode2v9ses(inode); 206 v9ses = v9fs_inode2v9ses(inode);
263 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 207 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
264 &v9fs_cache_inode_index_def, 208 &v9fs_cache_inode_index_def,
265 vcookie); 209 v9inode);
266 210
267 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, 211 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
268 vcookie->fscache); 212 v9inode->fscache);
269} 213}
270 214
271void v9fs_cache_inode_put_cookie(struct inode *inode) 215void v9fs_cache_inode_put_cookie(struct inode *inode)
272{ 216{
273 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 217 struct v9fs_inode *v9inode = V9FS_I(inode);
274 218
275 if (!vcookie->fscache) 219 if (!v9inode->fscache)
276 return; 220 return;
277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, 221 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
278 vcookie->fscache); 222 v9inode->fscache);
279 223
280 fscache_relinquish_cookie(vcookie->fscache, 0); 224 fscache_relinquish_cookie(v9inode->fscache, 0);
281 vcookie->fscache = NULL; 225 v9inode->fscache = NULL;
282} 226}
283 227
284void v9fs_cache_inode_flush_cookie(struct inode *inode) 228void v9fs_cache_inode_flush_cookie(struct inode *inode)
285{ 229{
286 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 230 struct v9fs_inode *v9inode = V9FS_I(inode);
287 231
288 if (!vcookie->fscache) 232 if (!v9inode->fscache)
289 return; 233 return;
290 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, 234 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
291 vcookie->fscache); 235 v9inode->fscache);
292 236
293 fscache_relinquish_cookie(vcookie->fscache, 1); 237 fscache_relinquish_cookie(v9inode->fscache, 1);
294 vcookie->fscache = NULL; 238 v9inode->fscache = NULL;
295} 239}
296 240
297void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) 241void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
298{ 242{
299 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 243 struct v9fs_inode *v9inode = V9FS_I(inode);
300 struct p9_fid *fid; 244 struct p9_fid *fid;
301 245
302 if (!vcookie->fscache) 246 if (!v9inode->fscache)
303 return; 247 return;
304 248
305 spin_lock(&vcookie->lock); 249 spin_lock(&v9inode->fscache_lock);
306 fid = filp->private_data; 250 fid = filp->private_data;
307 if ((filp->f_flags & O_ACCMODE) != O_RDONLY) 251 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
308 v9fs_cache_inode_flush_cookie(inode); 252 v9fs_cache_inode_flush_cookie(inode);
309 else 253 else
310 v9fs_cache_inode_get_cookie(inode); 254 v9fs_cache_inode_get_cookie(inode);
311 255
312 spin_unlock(&vcookie->lock); 256 spin_unlock(&v9inode->fscache_lock);
313} 257}
314 258
315void v9fs_cache_inode_reset_cookie(struct inode *inode) 259void v9fs_cache_inode_reset_cookie(struct inode *inode)
316{ 260{
317 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 261 struct v9fs_inode *v9inode = V9FS_I(inode);
318 struct v9fs_session_info *v9ses; 262 struct v9fs_session_info *v9ses;
319 struct fscache_cookie *old; 263 struct fscache_cookie *old;
320 264
321 if (!vcookie->fscache) 265 if (!v9inode->fscache)
322 return; 266 return;
323 267
324 old = vcookie->fscache; 268 old = v9inode->fscache;
325 269
326 spin_lock(&vcookie->lock); 270 spin_lock(&v9inode->fscache_lock);
327 fscache_relinquish_cookie(vcookie->fscache, 1); 271 fscache_relinquish_cookie(v9inode->fscache, 1);
328 272
329 v9ses = v9fs_inode2v9ses(inode); 273 v9ses = v9fs_inode2v9ses(inode);
330 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 274 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
331 &v9fs_cache_inode_index_def, 275 &v9fs_cache_inode_index_def,
332 vcookie); 276 v9inode);
333
334 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", 277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
335 inode, old, vcookie->fscache); 278 inode, old, v9inode->fscache);
336 279
337 spin_unlock(&vcookie->lock); 280 spin_unlock(&v9inode->fscache_lock);
338} 281}
339 282
340int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) 283int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
341{ 284{
342 struct inode *inode = page->mapping->host; 285 struct inode *inode = page->mapping->host;
343 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 286 struct v9fs_inode *v9inode = V9FS_I(inode);
344 287
345 BUG_ON(!vcookie->fscache); 288 BUG_ON(!v9inode->fscache);
346 289
347 return fscache_maybe_release_page(vcookie->fscache, page, gfp); 290 return fscache_maybe_release_page(v9inode->fscache, page, gfp);
348} 291}
349 292
350void __v9fs_fscache_invalidate_page(struct page *page) 293void __v9fs_fscache_invalidate_page(struct page *page)
351{ 294{
352 struct inode *inode = page->mapping->host; 295 struct inode *inode = page->mapping->host;
353 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 296 struct v9fs_inode *v9inode = V9FS_I(inode);
354 297
355 BUG_ON(!vcookie->fscache); 298 BUG_ON(!v9inode->fscache);
356 299
357 if (PageFsCache(page)) { 300 if (PageFsCache(page)) {
358 fscache_wait_on_page_write(vcookie->fscache, page); 301 fscache_wait_on_page_write(v9inode->fscache, page);
359 BUG_ON(!PageLocked(page)); 302 BUG_ON(!PageLocked(page));
360 fscache_uncache_page(vcookie->fscache, page); 303 fscache_uncache_page(v9inode->fscache, page);
361 } 304 }
362} 305}
363 306
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
380int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) 323int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
381{ 324{
382 int ret; 325 int ret;
383 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 326 const struct v9fs_inode *v9inode = V9FS_I(inode);
384 327
385 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 328 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
386 if (!vcookie->fscache) 329 if (!v9inode->fscache)
387 return -ENOBUFS; 330 return -ENOBUFS;
388 331
389 ret = fscache_read_or_alloc_page(vcookie->fscache, 332 ret = fscache_read_or_alloc_page(v9inode->fscache,
390 page, 333 page,
391 v9fs_vfs_readpage_complete, 334 v9fs_vfs_readpage_complete,
392 NULL, 335 NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
418 unsigned *nr_pages) 361 unsigned *nr_pages)
419{ 362{
420 int ret; 363 int ret;
421 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 364 const struct v9fs_inode *v9inode = V9FS_I(inode);
422 365
423 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); 366 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
424 if (!vcookie->fscache) 367 if (!v9inode->fscache)
425 return -ENOBUFS; 368 return -ENOBUFS;
426 369
427 ret = fscache_read_or_alloc_pages(vcookie->fscache, 370 ret = fscache_read_or_alloc_pages(v9inode->fscache,
428 mapping, pages, nr_pages, 371 mapping, pages, nr_pages,
429 v9fs_vfs_readpage_complete, 372 v9fs_vfs_readpage_complete,
430 NULL, 373 NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
453void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) 396void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
454{ 397{
455 int ret; 398 int ret;
456 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 399 const struct v9fs_inode *v9inode = V9FS_I(inode);
457 400
458 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 401 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
459 ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); 402 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
460 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); 403 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
461 if (ret != 0) 404 if (ret != 0)
462 v9fs_uncache_page(inode, page); 405 v9fs_uncache_page(inode, page);
463} 406}
407
408/*
409 * wait for a page to complete writing to the cache
410 */
411void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
412{
413 const struct v9fs_inode *v9inode = V9FS_I(inode);
414 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
415 if (PageFsCache(page))
416 fscache_wait_on_page_write(v9inode->fscache, page);
417}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee8..049507a5b01c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
25#include <linux/fscache.h> 25#include <linux/fscache.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27 27
28extern struct kmem_cache *vcookie_cache;
29
30struct v9fs_cookie {
31 spinlock_t lock;
32 struct inode inode;
33 struct fscache_cookie *fscache;
34 struct p9_qid *qid;
35};
36
37static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
38{
39 return container_of(inode, struct v9fs_cookie, inode);
40}
41
42extern struct fscache_netfs v9fs_cache_netfs; 28extern struct fscache_netfs v9fs_cache_netfs;
43extern const struct fscache_cookie_def v9fs_cache_session_index_def; 29extern const struct fscache_cookie_def v9fs_cache_session_index_def;
44extern const struct fscache_cookie_def v9fs_cache_inode_index_def; 30extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
64 struct list_head *pages, 50 struct list_head *pages,
65 unsigned *nr_pages); 51 unsigned *nr_pages);
66extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); 52extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
67 53extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
68 54 struct page *page);
69/**
70 * v9fs_cache_register - Register v9fs file system with the cache
71 */
72static inline int v9fs_cache_register(void)
73{
74 return __v9fs_cache_register();
75}
76
77/**
78 * v9fs_cache_unregister - Unregister v9fs from the cache
79 */
80static inline void v9fs_cache_unregister(void)
81{
82 __v9fs_cache_unregister();
83}
84 55
85static inline int v9fs_fscache_release_page(struct page *page, 56static inline int v9fs_fscache_release_page(struct page *page,
86 gfp_t gfp) 57 gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
117 88
118static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 89static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
119{ 90{
120 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 91 struct v9fs_inode *v9inode = V9FS_I(inode);
121 fscache_uncache_page(vcookie->fscache, page); 92 fscache_uncache_page(v9inode->fscache, page);
122 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
123} 94}
124 95
125static inline void v9fs_vcookie_set_qid(struct inode *inode, 96static inline void v9fs_fscache_set_key(struct inode *inode,
126 struct p9_qid *qid) 97 struct p9_qid *qid)
127{ 98{
128 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 99 struct v9fs_inode *v9inode = V9FS_I(inode);
129 spin_lock(&vcookie->lock); 100 spin_lock(&v9inode->fscache_lock);
130 vcookie->qid = qid; 101 v9inode->fscache_key = qid;
131 spin_unlock(&vcookie->lock); 102 spin_unlock(&v9inode->fscache_lock);
132} 103}
133 104
134#else /* CONFIG_9P_FSCACHE */ 105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
135 106 struct page *page)
136static inline int v9fs_cache_register(void)
137{ 107{
138 return 1; 108 return __v9fs_fscache_wait_on_page_write(inode, page);
139} 109}
140 110
141static inline void v9fs_cache_unregister(void) {} 111#else /* CONFIG_9P_FSCACHE */
142 112
143static inline int v9fs_fscache_release_page(struct page *page, 113static inline int v9fs_fscache_release_page(struct page *page,
144 gfp_t gfp) { 114 gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
168static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 138static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
169{} 139{}
170 140
171static inline void v9fs_vcookie_set_qid(struct inode *inode, 141static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
172 struct p9_qid *qid) 142 struct page *page)
173{} 143{
144 return;
145}
174 146
175#endif /* CONFIG_9P_FSCACHE */ 147#endif /* CONFIG_9P_FSCACHE */
176#endif /* _9P_CACHE_H */ 148#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d70..cd63e002d826 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
125 return -ENOMEM; 125 return -ENOMEM;
126} 126}
127 127
128/** 128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 * v9fs_fid_lookup - lookup for a fid, try to walk if not found 129 uid_t uid, int any)
130 * @dentry: dentry to look for fid in
131 *
132 * Look for a fid in the specified dentry for the current user.
133 * If no fid is found, try to create one walking from a fid from the parent
134 * dentry (if it has one), or the root dentry. If the user haven't accessed
135 * the fs yet, attach now and walk from the root.
136 */
137
138struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
139{ 130{
140 int i, n, l, clone, any, access;
141 u32 uid;
142 struct p9_fid *fid, *old_fid = NULL;
143 struct dentry *ds; 131 struct dentry *ds;
144 struct v9fs_session_info *v9ses;
145 char **wnames, *uname; 132 char **wnames, *uname;
133 int i, n, l, clone, access;
134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL;
146 136
147 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_inode2v9ses(dentry->d_inode);
148 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
149 switch (access) {
150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
153 uid = current_fsuid();
154 any = 0;
155 break;
156
157 case V9FS_ACCESS_ANY:
158 uid = v9ses->uid;
159 any = 1;
160 break;
161
162 default:
163 uid = ~0;
164 any = 0;
165 break;
166 }
167
168 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
169 if (fid) 140 if (fid)
170 return fid; 141 return fid;
@@ -250,6 +221,45 @@ err_out:
250 return fid; 221 return fid;
251} 222}
252 223
224/**
225 * v9fs_fid_lookup - lookup for a fid, try to walk if not found
226 * @dentry: dentry to look for fid in
227 *
228 * Look for a fid in the specified dentry for the current user.
229 * If no fid is found, try to create one walking from a fid from the parent
230 * dentry (if it has one), or the root dentry. If the user haven't accessed
231 * the fs yet, attach now and walk from the root.
232 */
233
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{
236 uid_t uid;
237 int any, access;
238 struct v9fs_session_info *v9ses;
239
240 v9ses = v9fs_inode2v9ses(dentry->d_inode);
241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) {
243 case V9FS_ACCESS_SINGLE:
244 case V9FS_ACCESS_USER:
245 case V9FS_ACCESS_CLIENT:
246 uid = current_fsuid();
247 any = 0;
248 break;
249
250 case V9FS_ACCESS_ANY:
251 uid = v9ses->uid;
252 any = 1;
253 break;
254
255 default:
256 uid = ~0;
257 any = 0;
258 break;
259 }
260 return v9fs_fid_lookup_with_uid(dentry, uid, any);
261}
262
253struct p9_fid *v9fs_fid_clone(struct dentry *dentry) 263struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
254{ 264{
255 struct p9_fid *fid, *ret; 265 struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
261 ret = p9_client_walk(fid, 0, NULL, 1); 271 ret = p9_client_walk(fid, 0, NULL, 1);
262 return ret; 272 return ret;
263} 273}
274
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
276{
277 struct p9_fid *fid, *ret;
278
279 fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
280 if (IS_ERR(fid))
281 return fid;
282
283 ret = p9_client_walk(fid, 0, NULL, 1);
284 return ret;
285}
286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{
289 int err;
290 struct p9_fid *fid;
291
292 fid = v9fs_fid_clone_with_uid(dentry, 0);
293 if (IS_ERR(fid))
294 goto error_out;
295 /*
296 * writeback fid will only be used to write back the
297 * dirty pages. We always request for the open fid in read-write
298 * mode so that a partial page write which result in page
299 * read can work.
300 */
301 err = p9_client_open(fid, O_RDWR);
302 if (err < 0) {
303 p9_client_clunk(fid);
304 fid = ERR_PTR(err);
305 goto error_out;
306 }
307error_out:
308 return fid;
309}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996d..bb0b6e7f58fc 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
19 * Boston, MA 02111-1301 USA 19 * Boston, MA 02111-1301 USA
20 * 20 *
21 */ 21 */
22 22#ifndef FS_9P_FID_H
23#define FS_9P_FID_H
23#include <linux/list.h> 24#include <linux/list.h>
24 25
25/** 26/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
45struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); 46struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
46struct p9_fid *v9fs_fid_clone(struct dentry *dentry); 47struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
47int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); 48int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
49struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
50#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba83..c82b017f51f3 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
39 39
40static DEFINE_SPINLOCK(v9fs_sessionlist_lock); 40static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
41static LIST_HEAD(v9fs_sessionlist); 41static LIST_HEAD(v9fs_sessionlist);
42struct kmem_cache *v9fs_inode_cache;
42 43
43/* 44/*
44 * Option Parsing (code inspired by NFS code) 45 * Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
55 /* Cache options */ 56 /* Cache options */
56 Opt_cache_loose, Opt_fscache, 57 Opt_cache_loose, Opt_fscache,
57 /* Access options */ 58 /* Access options */
58 Opt_access, 59 Opt_access, Opt_posixacl,
59 /* Error token */ 60 /* Error token */
60 Opt_err 61 Opt_err
61}; 62};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
73 {Opt_fscache, "fscache"}, 74 {Opt_fscache, "fscache"},
74 {Opt_cachetag, "cachetag=%s"}, 75 {Opt_cachetag, "cachetag=%s"},
75 {Opt_access, "access=%s"}, 76 {Opt_access, "access=%s"},
77 {Opt_posixacl, "posixacl"},
76 {Opt_err, NULL} 78 {Opt_err, NULL}
77}; 79};
78 80
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 else if (strcmp(s, "any") == 0) 196 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 197 v9ses->flags |= V9FS_ACCESS_ANY;
196 else if (strcmp(s, "client") == 0) { 198 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT; 199 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else { 200 } else {
207 v9ses->flags |= V9FS_ACCESS_SINGLE; 201 v9ses->flags |= V9FS_ACCESS_SINGLE;
208 v9ses->uid = simple_strtoul(s, &e, 10); 202 v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
212 kfree(s); 206 kfree(s);
213 break; 207 break;
214 208
209 case Opt_posixacl:
210#ifdef CONFIG_9P_FS_POSIX_ACL
211 v9ses->flags |= V9FS_POSIX_ACL;
212#else
213 P9_DPRINTK(P9_DEBUG_ERROR,
214 "Not defined CONFIG_9P_FS_POSIX_ACL. "
215 "Ignoring posixacl option\n");
216#endif
217 break;
218
215 default: 219 default:
216 continue; 220 continue;
217 } 221 }
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
260 list_add(&v9ses->slist, &v9fs_sessionlist); 264 list_add(&v9ses->slist, &v9fs_sessionlist);
261 spin_unlock(&v9fs_sessionlist_lock); 265 spin_unlock(&v9fs_sessionlist_lock);
262 266
263 v9ses->flags = V9FS_ACCESS_USER;
264 strcpy(v9ses->uname, V9FS_DEFUSER); 267 strcpy(v9ses->uname, V9FS_DEFUSER);
265 strcpy(v9ses->aname, V9FS_DEFANAME); 268 strcpy(v9ses->aname, V9FS_DEFANAME);
266 v9ses->uid = ~0; 269 v9ses->uid = ~0;
267 v9ses->dfltuid = V9FS_DEFUID; 270 v9ses->dfltuid = V9FS_DEFUID;
268 v9ses->dfltgid = V9FS_DEFGID; 271 v9ses->dfltgid = V9FS_DEFGID;
269 272
270 rc = v9fs_parse_options(v9ses, data);
271 if (rc < 0) {
272 retval = rc;
273 goto error;
274 }
275
276 v9ses->clnt = p9_client_create(dev_name, data); 273 v9ses->clnt = p9_client_create(dev_name, data);
277 if (IS_ERR(v9ses->clnt)) { 274 if (IS_ERR(v9ses->clnt)) {
278 retval = PTR_ERR(v9ses->clnt); 275 retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
281 goto error; 278 goto error;
282 } 279 }
283 280
284 if (p9_is_proto_dotl(v9ses->clnt)) 281 v9ses->flags = V9FS_ACCESS_USER;
282
283 if (p9_is_proto_dotl(v9ses->clnt)) {
284 v9ses->flags = V9FS_ACCESS_CLIENT;
285 v9ses->flags |= V9FS_PROTO_2000L; 285 v9ses->flags |= V9FS_PROTO_2000L;
286 else if (p9_is_proto_dotu(v9ses->clnt)) 286 } else if (p9_is_proto_dotu(v9ses->clnt)) {
287 v9ses->flags |= V9FS_PROTO_2000U; 287 v9ses->flags |= V9FS_PROTO_2000U;
288 }
289
290 rc = v9fs_parse_options(v9ses, data);
291 if (rc < 0) {
292 retval = rc;
293 goto error;
294 }
288 295
289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 296 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
290 297
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
306 v9ses->flags |= V9FS_ACCESS_ANY; 313 v9ses->flags |= V9FS_ACCESS_ANY;
307 v9ses->uid = ~0; 314 v9ses->uid = ~0;
308 } 315 }
316 if (!v9fs_proto_dotl(v9ses) ||
317 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
318 /*
319 * We support ACL checks on clinet only if the protocol is
320 * 9P2000.L and access is V9FS_ACCESS_CLIENT.
321 */
322 v9ses->flags &= ~V9FS_ACL_MASK;
323 }
309 324
310 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 325 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
311 v9ses->aname); 326 v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
467 kobject_put(v9fs_kobj); 482 kobject_put(v9fs_kobj);
468} 483}
469 484
485static void v9fs_inode_init_once(void *foo)
486{
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif
492 inode_init_once(&v9inode->vfs_inode);
493}
494
495/**
496 * v9fs_init_inode_cache - initialize a cache for 9P
497 * Returns 0 on success.
498 */
499static int v9fs_init_inode_cache(void)
500{
501 v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
502 sizeof(struct v9fs_inode),
503 0, (SLAB_RECLAIM_ACCOUNT|
504 SLAB_MEM_SPREAD),
505 v9fs_inode_init_once);
506 if (!v9fs_inode_cache)
507 return -ENOMEM;
508
509 return 0;
510}
511
512/**
513 * v9fs_destroy_inode_cache - destroy the cache of 9P inode
514 *
515 */
516static void v9fs_destroy_inode_cache(void)
517{
518 kmem_cache_destroy(v9fs_inode_cache);
519}
520
521static int v9fs_cache_register(void)
522{
523 int ret;
524 ret = v9fs_init_inode_cache();
525 if (ret < 0)
526 return ret;
527#ifdef CONFIG_9P_FSCACHE
528 return fscache_register_netfs(&v9fs_cache_netfs);
529#else
530 return ret;
531#endif
532}
533
534static void v9fs_cache_unregister(void)
535{
536 v9fs_destroy_inode_cache();
537#ifdef CONFIG_9P_FSCACHE
538 fscache_unregister_netfs(&v9fs_cache_netfs);
539#endif
540}
541
470/** 542/**
471 * init_v9fs - Initialize module 543 * init_v9fs - Initialize module
472 * 544 *
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0d..bd8496db135b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_H
24#define FS_9P_V9FS_H
25
23#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
24 27
25/** 28/**
@@ -28,8 +31,10 @@
28 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions 31 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
29 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy 32 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
30 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) 33 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
34 * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
31 * @V9FS_ACCESS_ANY: use a single attach for all users 35 * @V9FS_ACCESS_ANY: use a single attach for all users
32 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options 36 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
37 * @V9FS_POSIX_ACL: POSIX ACLs are enforced
33 * 38 *
34 * Session flags reflect options selected by users at mount time 39 * Session flags reflect options selected by users at mount time
35 */ 40 */
@@ -37,13 +42,15 @@
37 V9FS_ACCESS_USER | \ 42 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT) 43 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY 44#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
45#define V9FS_ACL_MASK V9FS_POSIX_ACL
40 46
41enum p9_session_flags { 47enum p9_session_flags {
42 V9FS_PROTO_2000U = 0x01, 48 V9FS_PROTO_2000U = 0x01,
43 V9FS_PROTO_2000L = 0x02, 49 V9FS_PROTO_2000L = 0x02,
44 V9FS_ACCESS_SINGLE = 0x04, 50 V9FS_ACCESS_SINGLE = 0x04,
45 V9FS_ACCESS_USER = 0x08, 51 V9FS_ACCESS_USER = 0x08,
46 V9FS_ACCESS_CLIENT = 0x10 52 V9FS_ACCESS_CLIENT = 0x10,
53 V9FS_POSIX_ACL = 0x20
47}; 54};
48 55
49/* possible values of ->cache */ 56/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
109 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
110 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
111 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120};
121
122/* cache_validity flags */
123#define V9FS_INO_INVALID_ATTR 0x01
124
125struct v9fs_inode {
126#ifdef CONFIG_9P_FSCACHE
127 spinlock_t fscache_lock;
128 struct fscache_cookie *fscache;
129 struct p9_qid *fscache_key;
130#endif
131 unsigned int cache_validity;
132 struct p9_fid *writeback_fid;
133 struct inode vfs_inode;
112}; 134};
113 135
136static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
137{
138 return container_of(inode, struct v9fs_inode, vfs_inode);
139}
140
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 141struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 142 char *);
116extern void v9fs_session_close(struct v9fs_session_info *v9ses); 143extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry); 151 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, 152extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p); 153 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses, 154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid, 155 struct p9_fid *fid,
129 struct super_block *sb); 156 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl; 157extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl; 158extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 159extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses, 160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid, 161 struct p9_fid *fid,
136 struct super_block *sb); 162 struct super_block *sb);
137 163
138/* other default globals */ 164/* other default globals */
139#define V9FS_PORT 564 165#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
158} 184}
159 185
160/** 186/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by 187 * v9fs_get_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request 188 * issuing a attribute request
163 * @v9ses: session information 189 * @v9ses: session information
164 * @fid: fid to issue attribute request for 190 * @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
166 * 192 *
167 */ 193 */
168static inline struct inode * 194static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 195v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb) 196 struct super_block *sb)
171{ 197{
172 if (v9fs_proto_dotl(v9ses)) 198 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb); 199 return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
174 else 200 else
175 return v9fs_inode(v9ses, fid, sb); 201 return v9fs_inode_from_fid(v9ses, fid, sb);
176} 202}
203#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597ec..4014160903a9 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_VFS_H
24#define FS_9P_V9FS_VFS_H
23 25
24/* plan9 semantics are that created files are implicitly opened. 26/* plan9 semantics are that created files are implicitly opened.
25 * But linux semantics are that you call create, then open. 27 * But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
36 * unlink calls remove, which is an implicit clunk. So we have to track 38 * unlink calls remove, which is an implicit clunk. So we have to track
37 * that kind of thing so that we don't try to clunk a dead fid. 39 * that kind of thing so that we don't try to clunk a dead fid.
38 */ 40 */
41#define P9_LOCK_TIMEOUT (30*HZ)
39 42
40extern struct file_system_type v9fs_fs_type; 43extern struct file_system_type v9fs_fs_type;
41extern const struct address_space_operations v9fs_addr_operations; 44extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
45extern const struct file_operations v9fs_dir_operations_dotl; 48extern const struct file_operations v9fs_dir_operations_dotl;
46extern const struct dentry_operations v9fs_dentry_operations; 49extern const struct dentry_operations v9fs_dentry_operations;
47extern const struct dentry_operations v9fs_cached_dentry_operations; 50extern const struct dentry_operations v9fs_cached_dentry_operations;
51extern const struct file_operations v9fs_cached_file_operations;
52extern const struct file_operations v9fs_cached_file_operations_dotl;
53extern struct kmem_cache *v9fs_inode_cache;
48 54
49#ifdef CONFIG_9P_FSCACHE
50struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
51void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
52#endif
53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 57struct inode *v9fs_get_inode(struct super_block *sb, int mode);
58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode);
55void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
62int v9fs_uflags2omode(int uflags, int extended); 67int v9fs_uflags2omode(int uflags, int extended);
63 68
64ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 69ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
65void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
66int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
67int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, int datasync);
68 74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
69#define P9_LOCK_TIMEOUT (30*HZ) 75 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
77int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
78static inline void v9fs_invalidate_inode_attr(struct inode *inode)
79{
80 struct v9fs_inode *v9inode;
81 v9inode = V9FS_I(inode);
82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
83 return;
84}
85#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863e..2524e4cbb8ea 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "cache.h" 41#include "cache.h"
42#include "fid.h"
42 43
43/** 44/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 45 * v9fs_fid_readpage - read an entire page in from 9P
45 * 46 *
46 * @filp: file being read 47 * @fid: fid being read
47 * @page: structure to page 48 * @page: structure to page
48 * 49 *
49 */ 50 */
50 51static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 int retval; 53 int retval;
54 loff_t offset; 54 loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
67 buffer = kmap(page); 67 buffer = kmap(page);
68 offset = page_offset(page); 68 offset = page_offset(page);
69 69
70 retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); 70 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
71 if (retval < 0) { 71 if (retval < 0) {
72 v9fs_uncache_page(inode, page); 72 v9fs_uncache_page(inode, page);
73 goto done; 73 goto done;
@@ -87,6 +87,19 @@ done:
87} 87}
88 88
89/** 89/**
90 * v9fs_vfs_readpage - read an entire page in from 9P
91 *
92 * @filp: file being read
93 * @page: structure to page
94 *
95 */
96
97static int v9fs_vfs_readpage(struct file *filp, struct page *page)
98{
99 return v9fs_fid_readpage(filp->private_data, page);
100}
101
102/**
90 * v9fs_vfs_readpages - read a set of pages from 9P 103 * v9fs_vfs_readpages - read a set of pages from 9P
91 * 104 *
92 * @filp: file being read 105 * @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
124{ 137{
125 if (PagePrivate(page)) 138 if (PagePrivate(page))
126 return 0; 139 return 0;
127
128 return v9fs_fscache_release_page(page, gfp); 140 return v9fs_fscache_release_page(page, gfp);
129} 141}
130 142
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
137 149
138static void v9fs_invalidate_page(struct page *page, unsigned long offset) 150static void v9fs_invalidate_page(struct page *page, unsigned long offset)
139{ 151{
152 /*
153 * If called with zero offset, we should release
154 * the private state assocated with the page
155 */
140 if (offset == 0) 156 if (offset == 0)
141 v9fs_fscache_invalidate_page(page); 157 v9fs_fscache_invalidate_page(page);
142} 158}
143 159
160static int v9fs_vfs_writepage_locked(struct page *page)
161{
162 char *buffer;
163 int retval, len;
164 loff_t offset, size;
165 mm_segment_t old_fs;
166 struct v9fs_inode *v9inode;
167 struct inode *inode = page->mapping->host;
168
169 v9inode = V9FS_I(inode);
170 size = i_size_read(inode);
171 if (page->index == size >> PAGE_CACHE_SHIFT)
172 len = size & ~PAGE_CACHE_MASK;
173 else
174 len = PAGE_CACHE_SIZE;
175
176 set_page_writeback(page);
177
178 buffer = kmap(page);
179 offset = page_offset(page);
180
181 old_fs = get_fs();
182 set_fs(get_ds());
183 /* We should have writeback_fid always set */
184 BUG_ON(!v9inode->writeback_fid);
185
186 retval = v9fs_file_write_internal(inode,
187 v9inode->writeback_fid,
188 (__force const char __user *)buffer,
189 len, &offset, 0);
190 if (retval > 0)
191 retval = 0;
192
193 set_fs(old_fs);
194 kunmap(page);
195 end_page_writeback(page);
196 return retval;
197}
198
199static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
200{
201 int retval;
202
203 retval = v9fs_vfs_writepage_locked(page);
204 if (retval < 0) {
205 if (retval == -EAGAIN) {
206 redirty_page_for_writepage(wbc, page);
207 retval = 0;
208 } else {
209 SetPageError(page);
210 mapping_set_error(page->mapping, retval);
211 }
212 } else
213 retval = 0;
214
215 unlock_page(page);
216 return retval;
217}
218
144/** 219/**
145 * v9fs_launder_page - Writeback a dirty page 220 * v9fs_launder_page - Writeback a dirty page
146 * Since the writes go directly to the server, we simply return a 0
147 * here to indicate success.
148 *
149 * Returns 0 on success. 221 * Returns 0 on success.
150 */ 222 */
151 223
152static int v9fs_launder_page(struct page *page) 224static int v9fs_launder_page(struct page *page)
153{ 225{
226 int retval;
227 struct inode *inode = page->mapping->host;
228
229 v9fs_fscache_wait_on_page_write(inode, page);
230 if (clear_page_dirty_for_io(page)) {
231 retval = v9fs_vfs_writepage_locked(page);
232 if (retval)
233 return retval;
234 }
154 return 0; 235 return 0;
155} 236}
156 237
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
173 * with an error. 254 * with an error.
174 * 255 *
175 */ 256 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 257static ssize_t
177 loff_t pos, unsigned long nr_segs) 258v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 loff_t pos, unsigned long nr_segs)
178{ 260{
261 /*
262 * FIXME
263 * Now that we do caching with cache mode enabled, We need
264 * to support direct IO
265 */
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " 266 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n", 267 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name, 268 iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
183 270
184 return -EINVAL; 271 return -EINVAL;
185} 272}
273
274static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
275 loff_t pos, unsigned len, unsigned flags,
276 struct page **pagep, void **fsdata)
277{
278 int retval = 0;
279 struct page *page;
280 struct v9fs_inode *v9inode;
281 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
282 struct inode *inode = mapping->host;
283
284 v9inode = V9FS_I(inode);
285start:
286 page = grab_cache_page_write_begin(mapping, index, flags);
287 if (!page) {
288 retval = -ENOMEM;
289 goto out;
290 }
291 BUG_ON(!v9inode->writeback_fid);
292 if (PageUptodate(page))
293 goto out;
294
295 if (len == PAGE_CACHE_SIZE)
296 goto out;
297
298 retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
299 page_cache_release(page);
300 if (!retval)
301 goto start;
302out:
303 *pagep = page;
304 return retval;
305}
306
307static int v9fs_write_end(struct file *filp, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 loff_t last_pos = pos + copied;
312 struct inode *inode = page->mapping->host;
313
314 if (unlikely(copied < len)) {
315 /*
316 * zero out the rest of the area
317 */
318 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
319
320 zero_user(page, from + copied, len - copied);
321 flush_dcache_page(page);
322 }
323
324 if (!PageUptodate(page))
325 SetPageUptodate(page);
326 /*
327 * No need to use i_size_read() here, the i_size
328 * cannot change under us because we hold the i_mutex.
329 */
330 if (last_pos > inode->i_size) {
331 inode_add_bytes(inode, last_pos - inode->i_size);
332 i_size_write(inode, last_pos);
333 }
334 set_page_dirty(page);
335 unlock_page(page);
336 page_cache_release(page);
337
338 return copied;
339}
340
341
186const struct address_space_operations v9fs_addr_operations = { 342const struct address_space_operations v9fs_addr_operations = {
187 .readpage = v9fs_vfs_readpage, 343 .readpage = v9fs_vfs_readpage,
188 .readpages = v9fs_vfs_readpages, 344 .readpages = v9fs_vfs_readpages,
189 .releasepage = v9fs_release_page, 345 .set_page_dirty = __set_page_dirty_nobuffers,
190 .invalidatepage = v9fs_invalidate_page, 346 .writepage = v9fs_vfs_writepage,
191 .launder_page = v9fs_launder_page, 347 .write_begin = v9fs_write_begin,
192 .direct_IO = v9fs_direct_IO, 348 .write_end = v9fs_write_end,
349 .releasepage = v9fs_release_page,
350 .invalidatepage = v9fs_invalidate_page,
351 .launder_page = v9fs_launder_page,
352 .direct_IO = v9fs_direct_IO,
193}; 353};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5e..b6a3b9f7fe4d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0 63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0
64 * @dentry: dentry in question 64 * @dentry: dentry in question
65 * 65 *
66 * Only return 1 if our inode is invalid. Only non-synthetic files
67 * (ones without mtime == 0) should be calling this function.
68 *
69 */ 66 */
70
71static int v9fs_cached_dentry_delete(const struct dentry *dentry) 67static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 68{
73 struct inode *inode = dentry->d_inode; 69 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 70 dentry->d_name.name, dentry);
75 dentry);
76 71
77 if(!inode) 72 /* Don't cache negative dentries */
73 if (!dentry->d_inode)
78 return 1; 74 return 1;
79
80 return 0; 75 return 0;
81} 76}
82 77
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
105 } 100 }
106} 101}
107 102
103static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
104{
105 struct p9_fid *fid;
106 struct inode *inode;
107 struct v9fs_inode *v9inode;
108
109 if (nd->flags & LOOKUP_RCU)
110 return -ECHILD;
111
112 inode = dentry->d_inode;
113 if (!inode)
114 goto out_valid;
115
116 v9inode = V9FS_I(inode);
117 if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
118 int retval;
119 struct v9fs_session_info *v9ses;
120 fid = v9fs_fid_lookup(dentry);
121 if (IS_ERR(fid))
122 return PTR_ERR(fid);
123
124 v9ses = v9fs_inode2v9ses(inode);
125 if (v9fs_proto_dotl(v9ses))
126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else
128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0)
130 return retval;
131 }
132out_valid:
133 return 1;
134}
135
108const struct dentry_operations v9fs_cached_dentry_operations = { 136const struct dentry_operations v9fs_cached_dentry_operations = {
137 .d_revalidate = v9fs_lookup_revalidate,
109 .d_delete = v9fs_cached_dentry_delete, 138 .d_delete = v9fs_cached_dentry_delete,
110 .d_release = v9fs_dentry_release, 139 .d_release = v9fs_dentry_release,
111}; 140};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefed..9c2bdda5cd9d 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
295 P9_DPRINTK(P9_DEBUG_VFS, 295 P9_DPRINTK(P9_DEBUG_VFS,
296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n", 296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
297 inode, filp, fid ? fid->fid : -1); 297 inode, filp, fid ? fid->fid : -1);
298 filemap_write_and_wait(inode->i_mapping);
299 if (fid) 298 if (fid)
300 p9_client_clunk(fid); 299 p9_client_clunk(fid);
301 return 0; 300 return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c30674396..78bcb97c3425 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
44#include "fid.h" 44#include "fid.h"
45#include "cache.h" 45#include "cache.h"
46 46
47static const struct file_operations v9fs_cached_file_operations; 47static const struct vm_operations_struct v9fs_file_vm_ops;
48static const struct file_operations v9fs_cached_file_operations_dotl;
49 48
50/** 49/**
51 * v9fs_file_open - open a file (or directory) 50 * v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
57int v9fs_file_open(struct inode *inode, struct file *file) 56int v9fs_file_open(struct inode *inode, struct file *file)
58{ 57{
59 int err; 58 int err;
59 struct v9fs_inode *v9inode;
60 struct v9fs_session_info *v9ses; 60 struct v9fs_session_info *v9ses;
61 struct p9_fid *fid; 61 struct p9_fid *fid;
62 int omode; 62 int omode;
63 63
64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); 64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
65 v9inode = V9FS_I(inode);
65 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
66 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
67 omode = file->f_flags; 68 omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
89 } 90 }
90 91
91 file->private_data = fid; 92 file->private_data = fid;
92 if ((fid->qid.version) && (v9ses->cache)) { 93 if (v9ses->cache && !v9inode->writeback_fid) {
93 P9_DPRINTK(P9_DEBUG_VFS, "cached"); 94 /*
94 /* enable cached file options */ 95 * clone a fid and add it to writeback_fid
95 if(file->f_op == &v9fs_file_operations) 96 * we do it during open time instead of
96 file->f_op = &v9fs_cached_file_operations; 97 * page dirty time via write_begin/page_mkwrite
97 else if (file->f_op == &v9fs_file_operations_dotl) 98 * because we want write after unlink usecase
98 file->f_op = &v9fs_cached_file_operations_dotl; 99 * to work.
99 100 */
101 fid = v9fs_writeback_fid(file->f_path.dentry);
102 if (IS_ERR(fid)) {
103 err = PTR_ERR(fid);
104 goto out_error;
105 }
106 v9inode->writeback_fid = (void *) fid;
107 }
100#ifdef CONFIG_9P_FSCACHE 108#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache)
101 v9fs_cache_inode_set_cookie(inode, file); 110 v9fs_cache_inode_set_cookie(inode, file);
102#endif 111#endif
103 }
104
105 return 0; 112 return 0;
113out_error:
114 p9_client_clunk(file->private_data);
115 file->private_data = NULL;
116 return err;
106} 117}
107 118
108/** 119/**
@@ -335,25 +346,22 @@ out_err:
335} 346}
336 347
337/** 348/**
338 * v9fs_file_readn - read from a file 349 * v9fs_fid_readn - read from a fid
339 * @filp: file pointer to read 350 * @fid: fid to read
340 * @data: data buffer to read data into 351 * @data: data buffer to read data into
341 * @udata: user data buffer to read data into 352 * @udata: user data buffer to read data into
342 * @count: size of buffer 353 * @count: size of buffer
343 * @offset: offset at which to read data 354 * @offset: offset at which to read data
344 * 355 *
345 */ 356 */
346
347ssize_t 357ssize_t
348v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, 358v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
349 u64 offset) 359 u64 offset)
350{ 360{
351 int n, total, size; 361 int n, total, size;
352 struct p9_fid *fid = filp->private_data;
353 362
354 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 363 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
355 (long long unsigned) offset, count); 364 (long long unsigned) offset, count);
356
357 n = 0; 365 n = 0;
358 total = 0; 366 total = 0;
359 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 367 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
379} 387}
380 388
381/** 389/**
390 * v9fs_file_readn - read from a file
391 * @filp: file pointer to read
392 * @data: data buffer to read data into
393 * @udata: user data buffer to read data into
394 * @count: size of buffer
395 * @offset: offset at which to read data
396 *
397 */
398ssize_t
399v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
400 u64 offset)
401{
402 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
403}
404
405/**
382 * v9fs_file_read - read from a file 406 * v9fs_file_read - read from a file
383 * @filp: file pointer to read 407 * @filp: file pointer to read
384 * @udata: user data buffer to read data into 408 * @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
410 return ret; 434 return ret;
411} 435}
412 436
413/** 437ssize_t
414 * v9fs_file_write - write to a file 438v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
415 * @filp: file pointer to write 439 const char __user *data, size_t count,
416 * @data: data buffer to write data from 440 loff_t *offset, int invalidate)
417 * @count: size of buffer
418 * @offset: offset at which to write data
419 *
420 */
421
422static ssize_t
423v9fs_file_write(struct file *filp, const char __user * data,
424 size_t count, loff_t * offset)
425{ 441{
426 ssize_t retval;
427 size_t total = 0;
428 int n; 442 int n;
429 struct p9_fid *fid; 443 loff_t i_size;
444 size_t total = 0;
430 struct p9_client *clnt; 445 struct p9_client *clnt;
431 struct inode *inode = filp->f_path.dentry->d_inode;
432 loff_t origin = *offset; 446 loff_t origin = *offset;
433 unsigned long pg_start, pg_end; 447 unsigned long pg_start, pg_end;
434 448
435 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 449 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
436 (int)count, (int)*offset); 450 (int)count, (int)*offset);
437 451
438 fid = filp->private_data;
439 clnt = fid->clnt; 452 clnt = fid->clnt;
440
441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
444
445 retval = -EINVAL;
446 if ((ssize_t) count < 0)
447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
451
452 do { 453 do {
453 n = p9_client_write(fid, NULL, data+total, origin+total, count); 454 n = p9_client_write(fid, NULL, data+total, origin+total, count);
454 if (n <= 0) 455 if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
457 total += n; 458 total += n;
458 } while (count > 0); 459 } while (count > 0);
459 460
460 if (total > 0) { 461 if (invalidate && (total > 0)) {
461 pg_start = origin >> PAGE_CACHE_SHIFT; 462 pg_start = origin >> PAGE_CACHE_SHIFT;
462 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; 463 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
463 if (inode->i_mapping && inode->i_mapping->nrpages) 464 if (inode->i_mapping && inode->i_mapping->nrpages)
464 invalidate_inode_pages2_range(inode->i_mapping, 465 invalidate_inode_pages2_range(inode->i_mapping,
465 pg_start, pg_end); 466 pg_start, pg_end);
466 *offset += total; 467 *offset += total;
467 i_size_write(inode, i_size_read(inode) + total); 468 i_size = i_size_read(inode);
468 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 469 if (*offset > i_size) {
470 inode_add_bytes(inode, *offset - i_size);
471 i_size_write(inode, *offset);
472 }
469 } 473 }
470
471 if (n < 0) 474 if (n < 0)
472 retval = n; 475 return n;
473 else 476
474 retval = total; 477 return total;
478}
479
480/**
481 * v9fs_file_write - write to a file
482 * @filp: file pointer to write
483 * @data: data buffer to write data from
484 * @count: size of buffer
485 * @offset: offset at which to write data
486 *
487 */
488static ssize_t
489v9fs_file_write(struct file *filp, const char __user * data,
490 size_t count, loff_t *offset)
491{
492 ssize_t retval = 0;
493 loff_t origin = *offset;
494
495
496 retval = generic_write_checks(filp, &origin, &count, 0);
497 if (retval)
498 goto out;
499
500 retval = -EINVAL;
501 if ((ssize_t) count < 0)
502 goto out;
503 retval = 0;
504 if (!count)
505 goto out;
506
507 return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
508 filp->private_data,
509 data, count, offset, 1);
475out: 510out:
476 return retval; 511 return retval;
477} 512}
478 513
514
479static int v9fs_file_fsync(struct file *filp, int datasync) 515static int v9fs_file_fsync(struct file *filp, int datasync)
480{ 516{
481 struct p9_fid *fid; 517 struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
505 return retval; 541 return retval;
506} 542}
507 543
508static const struct file_operations v9fs_cached_file_operations = { 544static int
545v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
546{
547 int retval;
548
549 retval = generic_file_mmap(file, vma);
550 if (!retval)
551 vma->vm_ops = &v9fs_file_vm_ops;
552
553 return retval;
554}
555
556static int
557v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
558{
559 struct v9fs_inode *v9inode;
560 struct page *page = vmf->page;
561 struct file *filp = vma->vm_file;
562 struct inode *inode = filp->f_path.dentry->d_inode;
563
564
565 P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
566 page, (unsigned long)filp->private_data);
567
568 v9inode = V9FS_I(inode);
569 /* make sure the cache has finished storing the page */
570 v9fs_fscache_wait_on_page_write(inode, page);
571 BUG_ON(!v9inode->writeback_fid);
572 lock_page(page);
573 if (page->mapping != inode->i_mapping)
574 goto out_unlock;
575
576 return VM_FAULT_LOCKED;
577out_unlock:
578 unlock_page(page);
579 return VM_FAULT_NOPAGE;
580}
581
582static ssize_t
583v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
584 loff_t *offsetp)
585{
586 loff_t size, offset;
587 struct inode *inode;
588 struct address_space *mapping;
589
590 offset = *offsetp;
591 mapping = filp->f_mapping;
592 inode = mapping->host;
593 if (!count)
594 return 0;
595 size = i_size_read(inode);
596 if (offset < size)
597 filemap_write_and_wait_range(mapping, offset,
598 offset + count - 1);
599
600 return v9fs_file_read(filp, udata, count, offsetp);
601}
602
603/**
604 * v9fs_cached_file_read - read from a file
605 * @filp: file pointer to read
606 * @udata: user data buffer to read data into
607 * @count: size of buffer
608 * @offset: offset at which to read data
609 *
610 */
611static ssize_t
612v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
613 loff_t *offset)
614{
615 if (filp->f_flags & O_DIRECT)
616 return v9fs_direct_read(filp, data, count, offset);
617 return do_sync_read(filp, data, count, offset);
618}
619
620static ssize_t
621v9fs_direct_write(struct file *filp, const char __user * data,
622 size_t count, loff_t *offsetp)
623{
624 loff_t offset;
625 ssize_t retval;
626 struct inode *inode;
627 struct address_space *mapping;
628
629 offset = *offsetp;
630 mapping = filp->f_mapping;
631 inode = mapping->host;
632 if (!count)
633 return 0;
634
635 mutex_lock(&inode->i_mutex);
636 retval = filemap_write_and_wait_range(mapping, offset,
637 offset + count - 1);
638 if (retval)
639 goto err_out;
640 /*
641 * After a write we want buffered reads to be sure to go to disk to get
642 * the new data. We invalidate clean cached page from the region we're
643 * about to write. We do this *before* the write so that if we fail
644 * here we fall back to buffered write
645 */
646 if (mapping->nrpages) {
647 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
648 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
649
650 retval = invalidate_inode_pages2_range(mapping,
651 pg_start, pg_end);
652 /*
653 * If a page can not be invalidated, fall back
654 * to buffered write.
655 */
656 if (retval) {
657 if (retval == -EBUSY)
658 goto buff_write;
659 goto err_out;
660 }
661 }
662 retval = v9fs_file_write(filp, data, count, offsetp);
663err_out:
664 mutex_unlock(&inode->i_mutex);
665 return retval;
666
667buff_write:
668 mutex_unlock(&inode->i_mutex);
669 return do_sync_write(filp, data, count, offsetp);
670}
671
672/**
673 * v9fs_cached_file_write - write to a file
674 * @filp: file pointer to write
675 * @data: data buffer to write data from
676 * @count: size of buffer
677 * @offset: offset at which to write data
678 *
679 */
680static ssize_t
681v9fs_cached_file_write(struct file *filp, const char __user * data,
682 size_t count, loff_t *offset)
683{
684
685 if (filp->f_flags & O_DIRECT)
686 return v9fs_direct_write(filp, data, count, offset);
687 return do_sync_write(filp, data, count, offset);
688}
689
690static const struct vm_operations_struct v9fs_file_vm_ops = {
691 .fault = filemap_fault,
692 .page_mkwrite = v9fs_vm_page_mkwrite,
693};
694
695
696const struct file_operations v9fs_cached_file_operations = {
509 .llseek = generic_file_llseek, 697 .llseek = generic_file_llseek,
510 .read = do_sync_read, 698 .read = v9fs_cached_file_read,
699 .write = v9fs_cached_file_write,
511 .aio_read = generic_file_aio_read, 700 .aio_read = generic_file_aio_read,
512 .write = v9fs_file_write, 701 .aio_write = generic_file_aio_write,
513 .open = v9fs_file_open, 702 .open = v9fs_file_open,
514 .release = v9fs_dir_release, 703 .release = v9fs_dir_release,
515 .lock = v9fs_file_lock, 704 .lock = v9fs_file_lock,
516 .mmap = generic_file_readonly_mmap, 705 .mmap = v9fs_file_mmap,
517 .fsync = v9fs_file_fsync, 706 .fsync = v9fs_file_fsync,
518}; 707};
519 708
520static const struct file_operations v9fs_cached_file_operations_dotl = { 709const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek, 710 .llseek = generic_file_llseek,
522 .read = do_sync_read, 711 .read = v9fs_cached_file_read,
712 .write = v9fs_cached_file_write,
523 .aio_read = generic_file_aio_read, 713 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write, 714 .aio_write = generic_file_aio_write,
525 .open = v9fs_file_open, 715 .open = v9fs_file_open,
526 .release = v9fs_dir_release, 716 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl, 717 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl, 718 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap, 719 .mmap = v9fs_file_mmap,
530 .fsync = v9fs_file_fsync_dotl, 720 .fsync = v9fs_file_fsync_dotl,
531}; 721};
532 722
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c2..8a2c232f708a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
203 wstat->extension = NULL; 203 wstat->extension = NULL;
204} 204}
205 205
206#ifdef CONFIG_9P_FSCACHE
207/** 206/**
208 * v9fs_alloc_inode - helper function to allocate an inode 207 * v9fs_alloc_inode - helper function to allocate an inode
209 * This callback is executed before setting up the inode so that we
210 * can associate a vcookie with each inode.
211 * 208 *
212 */ 209 */
213
214struct inode *v9fs_alloc_inode(struct super_block *sb) 210struct inode *v9fs_alloc_inode(struct super_block *sb)
215{ 211{
216 struct v9fs_cookie *vcookie; 212 struct v9fs_inode *v9inode;
217 vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, 213 v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
218 GFP_KERNEL); 214 GFP_KERNEL);
219 if (!vcookie) 215 if (!v9inode)
220 return NULL; 216 return NULL;
221 217#ifdef CONFIG_9P_FSCACHE
222 vcookie->fscache = NULL; 218 v9inode->fscache = NULL;
223 vcookie->qid = NULL; 219 v9inode->fscache_key = NULL;
224 spin_lock_init(&vcookie->lock); 220 spin_lock_init(&v9inode->fscache_lock);
225 return &vcookie->inode; 221#endif
222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0;
224 return &v9inode->vfs_inode;
226} 225}
227 226
228/** 227/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
234{ 233{
235 struct inode *inode = container_of(head, struct inode, i_rcu); 234 struct inode *inode = container_of(head, struct inode, i_rcu);
236 INIT_LIST_HEAD(&inode->i_dentry); 235 INIT_LIST_HEAD(&inode->i_dentry);
237 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 236 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
238} 237}
239 238
240void v9fs_destroy_inode(struct inode *inode) 239void v9fs_destroy_inode(struct inode *inode)
241{ 240{
242 call_rcu(&inode->i_rcu, v9fs_i_callback); 241 call_rcu(&inode->i_rcu, v9fs_i_callback);
243} 242}
244#endif
245 243
246/** 244int v9fs_init_inode(struct v9fs_session_info *v9ses,
247 * v9fs_get_inode - helper function to setup an inode 245 struct inode *inode, int mode)
248 * @sb: superblock
249 * @mode: mode to setup inode with
250 *
251 */
252
253struct inode *v9fs_get_inode(struct super_block *sb, int mode)
254{ 246{
255 int err; 247 int err = 0;
256 struct inode *inode;
257 struct v9fs_session_info *v9ses = sb->s_fs_info;
258
259 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
260
261 inode = new_inode(sb);
262 if (!inode) {
263 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
264 return ERR_PTR(-ENOMEM);
265 }
266 248
267 inode_init_owner(inode, NULL, mode); 249 inode_init_owner(inode, NULL, mode);
268 inode->i_blocks = 0; 250 inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
292 case S_IFREG: 274 case S_IFREG:
293 if (v9fs_proto_dotl(v9ses)) { 275 if (v9fs_proto_dotl(v9ses)) {
294 inode->i_op = &v9fs_file_inode_operations_dotl; 276 inode->i_op = &v9fs_file_inode_operations_dotl;
295 inode->i_fop = &v9fs_file_operations_dotl; 277 if (v9ses->cache)
278 inode->i_fop =
279 &v9fs_cached_file_operations_dotl;
280 else
281 inode->i_fop = &v9fs_file_operations_dotl;
296 } else { 282 } else {
297 inode->i_op = &v9fs_file_inode_operations; 283 inode->i_op = &v9fs_file_inode_operations;
298 inode->i_fop = &v9fs_file_operations; 284 if (v9ses->cache)
285 inode->i_fop = &v9fs_cached_file_operations;
286 else
287 inode->i_fop = &v9fs_file_operations;
299 } 288 }
300 289
301 break; 290 break;
302
303 case S_IFLNK: 291 case S_IFLNK:
304 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { 292 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
305 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " 293 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
335 err = -EINVAL; 323 err = -EINVAL;
336 goto error; 324 goto error;
337 } 325 }
326error:
327 return err;
338 328
339 return inode; 329}
340 330
341error: 331/**
342 iput(inode); 332 * v9fs_get_inode - helper function to setup an inode
343 return ERR_PTR(err); 333 * @sb: superblock
334 * @mode: mode to setup inode with
335 *
336 */
337
338struct inode *v9fs_get_inode(struct super_block *sb, int mode)
339{
340 int err;
341 struct inode *inode;
342 struct v9fs_session_info *v9ses = sb->s_fs_info;
343
344 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
345
346 inode = new_inode(sb);
347 if (!inode) {
348 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
349 return ERR_PTR(-ENOMEM);
350 }
351 err = v9fs_init_inode(v9ses, inode, mode);
352 if (err) {
353 iput(inode);
354 return ERR_PTR(err);
355 }
356 return inode;
344} 357}
345 358
346/* 359/*
@@ -403,6 +416,8 @@ error:
403 */ 416 */
404void v9fs_evict_inode(struct inode *inode) 417void v9fs_evict_inode(struct inode *inode)
405{ 418{
419 struct v9fs_inode *v9inode = V9FS_I(inode);
420
406 truncate_inode_pages(inode->i_mapping, 0); 421 truncate_inode_pages(inode->i_mapping, 0);
407 end_writeback(inode); 422 end_writeback(inode);
408 filemap_fdatawrite(inode->i_mapping); 423 filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
410#ifdef CONFIG_9P_FSCACHE 425#ifdef CONFIG_9P_FSCACHE
411 v9fs_cache_inode_put_cookie(inode); 426 v9fs_cache_inode_put_cookie(inode);
412#endif 427#endif
428 /* clunk the fid stashed in writeback_fid */
429 if (v9inode->writeback_fid) {
430 p9_client_clunk(v9inode->writeback_fid);
431 v9inode->writeback_fid = NULL;
432 }
413} 433}
414 434
415struct inode * 435static struct inode *v9fs_qid_iget(struct super_block *sb,
416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 436 struct p9_qid *qid,
417 struct super_block *sb) 437 struct p9_wstat *st)
418{ 438{
419 int err, umode; 439 int retval, umode;
420 struct inode *ret = NULL; 440 unsigned long i_ino;
421 struct p9_wstat *st; 441 struct inode *inode;
422 442 struct v9fs_session_info *v9ses = sb->s_fs_info;
423 st = p9_client_stat(fid);
424 if (IS_ERR(st))
425 return ERR_CAST(st);
426 443
444 i_ino = v9fs_qid2ino(qid);
445 inode = iget_locked(sb, i_ino);
446 if (!inode)
447 return ERR_PTR(-ENOMEM);
448 if (!(inode->i_state & I_NEW))
449 return inode;
450 /*
451 * initialize the inode with the stat info
452 * FIXME!! we may need support for stale inodes
453 * later.
454 */
427 umode = p9mode2unixmode(v9ses, st->mode); 455 umode = p9mode2unixmode(v9ses, st->mode);
428 ret = v9fs_get_inode(sb, umode); 456 retval = v9fs_init_inode(v9ses, inode, umode);
429 if (IS_ERR(ret)) { 457 if (retval)
430 err = PTR_ERR(ret);
431 goto error; 458 goto error;
432 }
433
434 v9fs_stat2inode(st, ret, sb);
435 ret->i_ino = v9fs_qid2ino(&st->qid);
436 459
460 v9fs_stat2inode(st, inode, sb);
437#ifdef CONFIG_9P_FSCACHE 461#ifdef CONFIG_9P_FSCACHE
438 v9fs_vcookie_set_qid(ret, &st->qid); 462 v9fs_fscache_set_key(inode, &st->qid);
439 v9fs_cache_inode_get_cookie(ret); 463 v9fs_cache_inode_get_cookie(inode);
440#endif 464#endif
441 p9stat_free(st); 465 unlock_new_inode(inode);
442 kfree(st); 466 return inode;
443 return ret;
444error: 467error:
468 unlock_new_inode(inode);
469 iput(inode);
470 return ERR_PTR(retval);
471
472}
473
474struct inode *
475v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
476 struct super_block *sb)
477{
478 struct p9_wstat *st;
479 struct inode *inode = NULL;
480
481 st = p9_client_stat(fid);
482 if (IS_ERR(st))
483 return ERR_CAST(st);
484
485 inode = v9fs_qid_iget(sb, &st->qid, st);
445 p9stat_free(st); 486 p9stat_free(st);
446 kfree(st); 487 kfree(st);
447 return ERR_PTR(err); 488 return inode;
448} 489}
449 490
450/** 491/**
@@ -458,8 +499,8 @@ error:
458static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 499static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
459{ 500{
460 int retval; 501 int retval;
461 struct inode *file_inode;
462 struct p9_fid *v9fid; 502 struct p9_fid *v9fid;
503 struct inode *file_inode;
463 504
464 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 505 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
465 rmdir); 506 rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
470 return PTR_ERR(v9fid); 511 return PTR_ERR(v9fid);
471 512
472 retval = p9_client_remove(v9fid); 513 retval = p9_client_remove(v9fid);
473 if (!retval) 514 if (!retval) {
474 drop_nlink(file_inode); 515 /*
516 * directories on unlink should have zero
517 * link count
518 */
519 if (rmdir) {
520 clear_nlink(file_inode);
521 drop_nlink(dir);
522 } else
523 drop_nlink(file_inode);
524
525 v9fs_invalidate_inode_attr(file_inode);
526 v9fs_invalidate_inode_attr(dir);
527 }
475 return retval; 528 return retval;
476} 529}
477 530
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
531 } 584 }
532 585
533 /* instantiate inode and assign the unopened fid to the dentry */ 586 /* instantiate inode and assign the unopened fid to the dentry */
534 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 587 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
535 if (IS_ERR(inode)) { 588 if (IS_ERR(inode)) {
536 err = PTR_ERR(inode); 589 err = PTR_ERR(inode);
537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 590 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
570 int err; 623 int err;
571 u32 perm; 624 u32 perm;
572 int flags; 625 int flags;
573 struct v9fs_session_info *v9ses;
574 struct p9_fid *fid;
575 struct file *filp; 626 struct file *filp;
627 struct v9fs_inode *v9inode;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid, *inode_fid;
576 630
577 err = 0; 631 err = 0;
578 fid = NULL; 632 fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
592 goto error; 646 goto error;
593 } 647 }
594 648
649 v9fs_invalidate_inode_attr(dir);
595 /* if we are opening a file, assign the open fid to the file */ 650 /* if we are opening a file, assign the open fid to the file */
596 if (nd && nd->flags & LOOKUP_OPEN) { 651 if (nd && nd->flags & LOOKUP_OPEN) {
652 v9inode = V9FS_I(dentry->d_inode);
653 if (v9ses->cache && !v9inode->writeback_fid) {
654 /*
655 * clone a fid and add it to writeback_fid
656 * we do it during open time instead of
657 * page dirty time via write_begin/page_mkwrite
658 * because we want write after unlink usecase
659 * to work.
660 */
661 inode_fid = v9fs_writeback_fid(dentry);
662 if (IS_ERR(inode_fid)) {
663 err = PTR_ERR(inode_fid);
664 goto error;
665 }
666 v9inode->writeback_fid = (void *) inode_fid;
667 }
597 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 668 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
598 if (IS_ERR(filp)) { 669 if (IS_ERR(filp)) {
599 err = PTR_ERR(filp); 670 err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
601 } 672 }
602 673
603 filp->private_data = fid; 674 filp->private_data = fid;
675#ifdef CONFIG_9P_FSCACHE
676 if (v9ses->cache)
677 v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
678#endif
604 } else 679 } else
605 p9_client_clunk(fid); 680 p9_client_clunk(fid);
606 681
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
625{ 700{
626 int err; 701 int err;
627 u32 perm; 702 u32 perm;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid; 703 struct p9_fid *fid;
704 struct v9fs_session_info *v9ses;
630 705
631 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 706 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
632 err = 0; 707 err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
636 if (IS_ERR(fid)) { 711 if (IS_ERR(fid)) {
637 err = PTR_ERR(fid); 712 err = PTR_ERR(fid);
638 fid = NULL; 713 fid = NULL;
714 } else {
715 inc_nlink(dir);
716 v9fs_invalidate_inode_attr(dir);
639 } 717 }
640 718
641 if (fid) 719 if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
687 return ERR_PTR(result); 765 return ERR_PTR(result);
688 } 766 }
689 767
690 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 768 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
691 if (IS_ERR(inode)) { 769 if (IS_ERR(inode)) {
692 result = PTR_ERR(inode); 770 result = PTR_ERR(inode);
693 inode = NULL; 771 inode = NULL;
@@ -747,17 +825,19 @@ int
747v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 825v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
748 struct inode *new_dir, struct dentry *new_dentry) 826 struct inode *new_dir, struct dentry *new_dentry)
749{ 827{
828 int retval;
750 struct inode *old_inode; 829 struct inode *old_inode;
830 struct inode *new_inode;
751 struct v9fs_session_info *v9ses; 831 struct v9fs_session_info *v9ses;
752 struct p9_fid *oldfid; 832 struct p9_fid *oldfid;
753 struct p9_fid *olddirfid; 833 struct p9_fid *olddirfid;
754 struct p9_fid *newdirfid; 834 struct p9_fid *newdirfid;
755 struct p9_wstat wstat; 835 struct p9_wstat wstat;
756 int retval;
757 836
758 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 837 P9_DPRINTK(P9_DEBUG_VFS, "\n");
759 retval = 0; 838 retval = 0;
760 old_inode = old_dentry->d_inode; 839 old_inode = old_dentry->d_inode;
840 new_inode = new_dentry->d_inode;
761 v9ses = v9fs_inode2v9ses(old_inode); 841 v9ses = v9fs_inode2v9ses(old_inode);
762 oldfid = v9fs_fid_lookup(old_dentry); 842 oldfid = v9fs_fid_lookup(old_dentry);
763 if (IS_ERR(oldfid)) 843 if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
798 retval = p9_client_wstat(oldfid, &wstat); 878 retval = p9_client_wstat(oldfid, &wstat);
799 879
800clunk_newdir: 880clunk_newdir:
801 if (!retval) 881 if (!retval) {
882 if (new_inode) {
883 if (S_ISDIR(new_inode->i_mode))
884 clear_nlink(new_inode);
885 else
886 drop_nlink(new_inode);
887 /*
888 * Work around vfs rename rehash bug with
889 * FS_RENAME_DOES_D_MOVE
890 */
891 v9fs_invalidate_inode_attr(new_inode);
892 }
893 if (S_ISDIR(old_inode->i_mode)) {
894 if (!new_inode)
895 inc_nlink(new_dir);
896 drop_nlink(old_dir);
897 }
898 v9fs_invalidate_inode_attr(old_inode);
899 v9fs_invalidate_inode_attr(old_dir);
900 v9fs_invalidate_inode_attr(new_dir);
901
802 /* successful rename */ 902 /* successful rename */
803 d_move(old_dentry, new_dentry); 903 d_move(old_dentry, new_dentry);
904 }
804 up_write(&v9ses->rename_sem); 905 up_write(&v9ses->rename_sem);
805 p9_client_clunk(newdirfid); 906 p9_client_clunk(newdirfid);
806 907
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
831 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 932 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
832 err = -EPERM; 933 err = -EPERM;
833 v9ses = v9fs_inode2v9ses(dentry->d_inode); 934 v9ses = v9fs_inode2v9ses(dentry->d_inode);
834 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
835 return simple_getattr(mnt, dentry, stat); 936 generic_fillattr(dentry->d_inode, stat);
836 937 return 0;
938 }
837 fid = v9fs_fid_lookup(dentry); 939 fid = v9fs_fid_lookup(dentry);
838 if (IS_ERR(fid)) 940 if (IS_ERR(fid))
839 return PTR_ERR(fid); 941 return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
891 if (iattr->ia_valid & ATTR_GID) 993 if (iattr->ia_valid & ATTR_GID)
892 wstat.n_gid = iattr->ia_gid; 994 wstat.n_gid = iattr->ia_gid;
893 } 995 }
894
895 retval = p9_client_wstat(fid, &wstat);
896 if (retval < 0)
897 return retval;
898
899 if ((iattr->ia_valid & ATTR_SIZE) && 996 if ((iattr->ia_valid & ATTR_SIZE) &&
900 iattr->ia_size != i_size_read(dentry->d_inode)) { 997 iattr->ia_size != i_size_read(dentry->d_inode)) {
901 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 998 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
902 if (retval) 999 if (retval)
903 return retval; 1000 return retval;
904 } 1001 }
1002 /* Write all dirty data */
1003 if (S_ISREG(dentry->d_inode->i_mode))
1004 filemap_write_and_wait(dentry->d_inode->i_mapping);
1005
1006 retval = p9_client_wstat(fid, &wstat);
1007 if (retval < 0)
1008 return retval;
1009 v9fs_invalidate_inode_attr(dentry->d_inode);
905 1010
906 setattr_copy(dentry->d_inode, iattr); 1011 setattr_copy(dentry->d_inode, iattr);
907 mark_inode_dirty(dentry->d_inode); 1012 mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
924 char tag_name[14]; 1029 char tag_name[14];
925 unsigned int i_nlink; 1030 unsigned int i_nlink;
926 struct v9fs_session_info *v9ses = sb->s_fs_info; 1031 struct v9fs_session_info *v9ses = sb->s_fs_info;
1032 struct v9fs_inode *v9inode = V9FS_I(inode);
927 1033
928 inode->i_nlink = 1; 1034 inode->i_nlink = 1;
929 1035
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
983 1089
984 /* not real number of blocks, but 512 byte ones ... */ 1090 /* not real number of blocks, but 512 byte ones ... */
985 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 1091 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
1092 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
986} 1093}
987 1094
988/** 1095/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1115 int mode, const char *extension) 1222 int mode, const char *extension)
1116{ 1223{
1117 u32 perm; 1224 u32 perm;
1118 struct v9fs_session_info *v9ses;
1119 struct p9_fid *fid; 1225 struct p9_fid *fid;
1226 struct v9fs_session_info *v9ses;
1120 1227
1121 v9ses = v9fs_inode2v9ses(dir); 1228 v9ses = v9fs_inode2v9ses(dir);
1122 if (!v9fs_proto_dotu(v9ses)) { 1229 if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1130 if (IS_ERR(fid)) 1237 if (IS_ERR(fid))
1131 return PTR_ERR(fid); 1238 return PTR_ERR(fid);
1132 1239
1240 v9fs_invalidate_inode_attr(dir);
1133 p9_client_clunk(fid); 1241 p9_client_clunk(fid);
1134 return 0; 1242 return 0;
1135} 1243}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1166 struct dentry *dentry) 1274 struct dentry *dentry)
1167{ 1275{
1168 int retval; 1276 int retval;
1169 struct p9_fid *oldfid;
1170 char *name; 1277 char *name;
1278 struct p9_fid *oldfid;
1171 1279
1172 P9_DPRINTK(P9_DEBUG_VFS, 1280 P9_DPRINTK(P9_DEBUG_VFS,
1173 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1281 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1186 sprintf(name, "%d\n", oldfid->fid); 1294 sprintf(name, "%d\n", oldfid->fid);
1187 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); 1295 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1188 __putname(name); 1296 __putname(name);
1189 1297 if (!retval) {
1298 v9fs_refresh_inode(oldfid, old_dentry->d_inode);
1299 v9fs_invalidate_inode_attr(dir);
1300 }
1190clunk_fid: 1301clunk_fid:
1191 p9_client_clunk(oldfid); 1302 p9_client_clunk(oldfid);
1192 return retval; 1303 return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1237 return retval; 1348 return retval;
1238} 1349}
1239 1350
1351int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1352{
1353 loff_t i_size;
1354 struct p9_wstat *st;
1355 struct v9fs_session_info *v9ses;
1356
1357 v9ses = v9fs_inode2v9ses(inode);
1358 st = p9_client_stat(fid);
1359 if (IS_ERR(st))
1360 return PTR_ERR(st);
1361
1362 spin_lock(&inode->i_lock);
1363 /*
1364 * We don't want to refresh inode->i_size,
1365 * because we may have cached data
1366 */
1367 i_size = inode->i_size;
1368 v9fs_stat2inode(st, inode, inode->i_sb);
1369 if (v9ses->cache)
1370 inode->i_size = i_size;
1371 spin_unlock(&inode->i_lock);
1372 p9stat_free(st);
1373 kfree(st);
1374 return 0;
1375}
1376
1240static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1377static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1241 .create = v9fs_vfs_create, 1378 .create = v9fs_vfs_create,
1242 .lookup = v9fs_vfs_lookup, 1379 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace4..67c138e94feb 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid,
91 struct p9_fid *fid,
92 struct p9_stat_dotl *st)
93{
94 int retval;
95 unsigned long i_ino;
96 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info;
98
99 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino);
101 if (!inode)
102 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW))
104 return inode;
105 /*
106 * initialize the inode with the stat info
107 * FIXME!! we may need support for stale inodes
108 * later.
109 */
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode);
111 if (retval)
112 goto error;
113
114 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode);
118#endif
119 retval = v9fs_get_acl(inode, fid);
120 if (retval)
121 goto error;
122
123 unlock_new_inode(inode);
124 return inode;
125error:
126 unlock_new_inode(inode);
127 iput(inode);
128 return ERR_PTR(retval);
129
130}
131
89struct inode * 132struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb) 134 struct super_block *sb)
92{ 135{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st; 136 struct p9_stat_dotl *st;
137 struct inode *inode = NULL;
96 138
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st)) 140 if (IS_ERR(st))
99 return ERR_CAST(st); 141 return ERR_CAST(st);
100 142
101 ret = v9fs_get_inode(sb, st->st_mode); 143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st);
119 return ret;
120error:
121 kfree(st); 144 kfree(st);
122 return ERR_PTR(err); 145 return inode;
123} 146}
124 147
125/** 148/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd) 159 struct nameidata *nd)
137{ 160{
138 int err = 0; 161 int err = 0;
139 char *name = NULL;
140 gid_t gid; 162 gid_t gid;
141 int flags; 163 int flags;
142 mode_t mode; 164 mode_t mode;
143 struct v9fs_session_info *v9ses; 165 char *name = NULL;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp; 166 struct file *filp;
147 struct p9_qid qid; 167 struct p9_qid qid;
148 struct inode *inode; 168 struct inode *inode;
169 struct p9_fid *fid = NULL;
170 struct v9fs_inode *v9inode;
171 struct p9_fid *dfid, *ofid, *inode_fid;
172 struct v9fs_session_info *v9ses;
149 struct posix_acl *pacl = NULL, *dacl = NULL; 173 struct posix_acl *pacl = NULL, *dacl = NULL;
150 174
151 v9ses = v9fs_inode2v9ses(dir); 175 v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
196 err); 220 err);
197 goto error; 221 goto error;
198 } 222 }
223 v9fs_invalidate_inode_attr(dir);
199 224
200 /* instantiate inode and assign the unopened fid to the dentry */ 225 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1); 226 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
205 fid = NULL; 230 fid = NULL;
206 goto error; 231 goto error;
207 } 232 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) { 234 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode); 235 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
219 /* Now set the ACL based on the default value */ 244 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
221 246
247 v9inode = V9FS_I(inode);
248 if (v9ses->cache && !v9inode->writeback_fid) {
249 /*
250 * clone a fid and add it to writeback_fid
251 * we do it during open time instead of
252 * page dirty time via write_begin/page_mkwrite
253 * because we want write after unlink usecase
254 * to work.
255 */
256 inode_fid = v9fs_writeback_fid(dentry);
257 if (IS_ERR(inode_fid)) {
258 err = PTR_ERR(inode_fid);
259 goto error;
260 }
261 v9inode->writeback_fid = (void *) inode_fid;
262 }
222 /* Since we are opening a file, assign the open fid to the file */ 263 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 264 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) { 265 if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
226 return PTR_ERR(filp); 267 return PTR_ERR(filp);
227 } 268 }
228 filp->private_data = ofid; 269 filp->private_data = ofid;
270#ifdef CONFIG_9P_FSCACHE
271 if (v9ses->cache)
272 v9fs_cache_inode_set_cookie(inode, filp);
273#endif
229 return 0; 274 return 0;
230 275
231error: 276error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
300 goto error; 345 goto error;
301 } 346 }
302 347
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 348 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) { 349 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode); 350 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 351 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
327 } 372 }
328 /* Now set the ACL based on the default value */ 373 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl); 374 v9fs_set_create_acl(dentry, dacl, pacl);
330 375 inc_nlink(dir);
376 v9fs_invalidate_inode_attr(dir);
331error: 377error:
332 if (fid) 378 if (fid)
333 p9_client_clunk(fid); 379 p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 392 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM; 393 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode); 394 v9ses = v9fs_inode2v9ses(dentry->d_inode);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 395 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
350 return simple_getattr(mnt, dentry, stat); 396 generic_fillattr(dentry->d_inode, stat);
351 397 return 0;
398 }
352 fid = v9fs_fid_lookup(dentry); 399 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid)) 400 if (IS_ERR(fid))
354 return PTR_ERR(fid); 401 return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
406 if (IS_ERR(fid)) 453 if (IS_ERR(fid))
407 return PTR_ERR(fid); 454 return PTR_ERR(fid);
408 455
409 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0)
411 return retval;
412
413 if ((iattr->ia_valid & ATTR_SIZE) && 456 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) { 457 iattr->ia_size != i_size_read(dentry->d_inode)) {
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 458 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
416 if (retval) 459 if (retval)
417 return retval; 460 return retval;
418 } 461 }
462 /* Write all dirty data */
463 if (S_ISREG(dentry->d_inode->i_mode))
464 filemap_write_and_wait(dentry->d_inode->i_mapping);
465
466 retval = p9_client_setattr(fid, &p9attr);
467 if (retval < 0)
468 return retval;
469 v9fs_invalidate_inode_attr(dentry->d_inode);
419 470
420 setattr_copy(dentry->d_inode, iattr); 471 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode); 472 mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
439void 490void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 491v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{ 492{
493 struct v9fs_inode *v9inode = V9FS_I(inode);
442 494
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 495 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec; 496 inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 549 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them. 550 * because the inode structure does not have fields for them.
499 */ 551 */
552 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
500} 553}
501 554
502static int 555static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, 556v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname) 557 const char *symname)
505{ 558{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err; 559 int err;
513 gid_t gid; 560 gid_t gid;
561 char *name;
562 struct p9_qid qid;
563 struct inode *inode;
564 struct p9_fid *dfid;
565 struct p9_fid *fid = NULL;
566 struct v9fs_session_info *v9ses;
514 567
515 name = (char *) dentry->d_name.name; 568 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", 569 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
534 goto error; 587 goto error;
535 } 588 }
536 589
590 v9fs_invalidate_inode_attr(dir);
537 if (v9ses->cache) { 591 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */ 592 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1); 593 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
546 } 600 }
547 601
548 /* instantiate inode and assign the unopened fid to dentry */ 602 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 603 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) { 604 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode); 605 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 606 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry) 642 struct dentry *dentry)
589{ 643{
590 int err; 644 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name; 645 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry; 646 struct dentry *dir_dentry;
647 struct p9_fid *dfid, *oldfid;
648 struct v9fs_session_info *v9ses;
595 649
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", 650 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name, 651 dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
616 return err; 670 return err;
617 } 671 }
618 672
673 v9fs_invalidate_inode_attr(dir);
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 674 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */ 675 /* Get the latest stat info from server. */
621 struct p9_fid *fid; 676 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry); 677 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid)) 678 if (IS_ERR(fid))
626 return PTR_ERR(fid); 679 return PTR_ERR(fid);
627 680
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 681 v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 } 682 }
683 ihold(old_dentry->d_inode);
642 d_instantiate(dentry, old_dentry->d_inode); 684 d_instantiate(dentry, old_dentry->d_inode);
643 685
644 return err; 686 return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev) 699 dev_t rdev)
658{ 700{
659 int err; 701 int err;
702 gid_t gid;
660 char *name; 703 char *name;
661 mode_t mode; 704 mode_t mode;
662 struct v9fs_session_info *v9ses; 705 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL; 706 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode; 707 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid; 708 struct p9_qid qid;
667 struct dentry *dir_dentry; 709 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL; 710 struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
699 if (err < 0) 741 if (err < 0)
700 goto error; 742 goto error;
701 743
744 v9fs_invalidate_inode_attr(dir);
702 /* instantiate inode and assign the unopened fid to the dentry */ 745 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 746 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1); 747 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
710 goto error; 753 goto error;
711 } 754 }
712 755
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 756 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) { 757 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode); 758 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 759 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
782 return NULL; 825 return NULL;
783} 826}
784 827
828int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
829{
830 loff_t i_size;
831 struct p9_stat_dotl *st;
832 struct v9fs_session_info *v9ses;
833
834 v9ses = v9fs_inode2v9ses(inode);
835 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
836 if (IS_ERR(st))
837 return PTR_ERR(st);
838
839 spin_lock(&inode->i_lock);
840 /*
841 * We don't want to refresh inode->i_size,
842 * because we may have cached data
843 */
844 i_size = inode->i_size;
845 v9fs_stat2inode_dotl(st, inode);
846 if (v9ses->cache)
847 inode->i_size = i_size;
848 spin_unlock(&inode->i_lock);
849 kfree(st);
850 return 0;
851}
852
785const struct inode_operations v9fs_dir_inode_operations_dotl = { 853const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl, 854 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup, 855 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b8131..09fd08d1606f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 } else 86 } else
87 sb->s_op = &v9fs_super_ops; 87 sb->s_op = &v9fs_super_ops;
88 sb->s_bdi = &v9ses->bdi; 88 sb->s_bdi = &v9ses->bdi;
89 if (v9ses->cache)
90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
89 91
90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
91 MS_NOATIME; 93 if (!v9ses->cache)
94 sb->s_flags |= MS_SYNCHRONOUS;
92 95
93#ifdef CONFIG_9P_FS_POSIX_ACL 96#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT) 97 if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
95 sb->s_flags |= MS_POSIXACL; 98 sb->s_flags |= MS_POSIXACL;
96#endif 99#endif
97 100
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
151 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
152 goto release_sb; 155 goto release_sb;
153 } 156 }
154
155 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
156 if (!root) { 158 if (!root) {
157 iput(inode); 159 iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
166 retval = PTR_ERR(st); 168 retval = PTR_ERR(st);
167 goto release_sb; 169 goto release_sb;
168 } 170 }
169 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
170 v9fs_stat2inode_dotl(st, root->d_inode); 172 v9fs_stat2inode_dotl(st, root->d_inode);
171 kfree(st); 173 kfree(st);
172 } else { 174 } else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
183 p9stat_free(st); 185 p9stat_free(st);
184 kfree(st); 186 kfree(st);
185 } 187 }
188 v9fs_fid_add(root, fid);
186 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
187 if (retval) 190 if (retval)
188 goto release_sb; 191 goto release_sb;
189 v9fs_fid_add(root, fid); 192 /*
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
190 203
191 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
192 return dget(sb->s_root); 205 return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
197 v9fs_session_close(v9ses); 210 v9fs_session_close(v9ses);
198 kfree(v9ses); 211 kfree(v9ses);
199 return ERR_PTR(retval); 212 return ERR_PTR(retval);
200
201release_sb: 213release_sb:
202 /* 214 /*
203 * we will do the session_close and root dentry release 215 * we will do the session_close and root dentry
204 * in the below call. But we need to clunk fid, because we haven't 216 * release in the below call.
205 * attached the fid to dentry so it won't get clunked
206 * automatically.
207 */ 217 */
208 p9_client_clunk(fid);
209 deactivate_locked_super(sb); 218 deactivate_locked_super(sb);
210 return ERR_PTR(retval); 219 return ERR_PTR(retval);
211} 220}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
223 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
224 233
225 kill_anon_super(s); 234 kill_anon_super(s);
226 235 p9_client_clunk(v9ses->root_fid);
227 v9fs_session_cancel(v9ses); 236 v9fs_session_cancel(v9ses);
228 v9fs_session_close(v9ses); 237 v9fs_session_close(v9ses);
229 kfree(v9ses); 238 kfree(v9ses);
@@ -276,11 +285,31 @@ done:
276 return res; 285 return res;
277} 286}
278 287
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode)
297{
298 struct v9fs_session_info *v9ses;
299 v9ses = v9fs_inode2v9ses(inode);
300 if (v9ses->cache)
301 return generic_drop_inode(inode);
302 /*
303 * in case of non cached mode always drop the
304 * the inode because we want the inode attribute
305 * to always match that on the server.
306 */
307 return 1;
308}
309
279static const struct super_operations v9fs_super_ops = { 310static const struct super_operations v9fs_super_ops = {
280#ifdef CONFIG_9P_FSCACHE
281 .alloc_inode = v9fs_alloc_inode, 311 .alloc_inode = v9fs_alloc_inode,
282 .destroy_inode = v9fs_destroy_inode, 312 .destroy_inode = v9fs_destroy_inode,
283#endif
284 .statfs = simple_statfs, 313 .statfs = simple_statfs,
285 .evict_inode = v9fs_evict_inode, 314 .evict_inode = v9fs_evict_inode,
286 .show_options = generic_show_options, 315 .show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
288}; 317};
289 318
290static const struct super_operations v9fs_super_ops_dotl = { 319static const struct super_operations v9fs_super_ops_dotl = {
291#ifdef CONFIG_9P_FSCACHE
292 .alloc_inode = v9fs_alloc_inode, 320 .alloc_inode = v9fs_alloc_inode,
293 .destroy_inode = v9fs_destroy_inode, 321 .destroy_inode = v9fs_destroy_inode,
294#endif 322 .sync_fs = v9fs_sync_fs,
295 .statfs = v9fs_statfs, 323 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode,
296 .evict_inode = v9fs_evict_inode, 325 .evict_inode = v9fs_evict_inode,
297 .show_options = generic_show_options, 326 .show_options = generic_show_options,
298 .umount_begin = v9fs_umount_begin, 327 .umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
303 .mount = v9fs_mount, 332 .mount = v9fs_mount,
304 .kill_sb = v9fs_kill_super, 333 .kill_sb = v9fs_kill_super,
305 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
306 .fs_flags = FS_RENAME_DOES_D_MOVE, 335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
307}; 336};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..f3aa9b08b228 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
@@ -187,6 +187,7 @@ source "fs/omfs/Kconfig"
187source "fs/hpfs/Kconfig" 187source "fs/hpfs/Kconfig"
188source "fs/qnx4/Kconfig" 188source "fs/qnx4/Kconfig"
189source "fs/romfs/Kconfig" 189source "fs/romfs/Kconfig"
190source "fs/pstore/Kconfig"
190source "fs/sysv/Kconfig" 191source "fs/sysv/Kconfig"
191source "fs/ufs/Kconfig" 192source "fs/ufs/Kconfig"
192source "fs/exofs/Kconfig" 193source "fs/exofs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..fb68c2b8cf8a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
@@ -121,3 +123,4 @@ obj-$(CONFIG_BTRFS_FS) += btrfs/
121obj-$(CONFIG_GFS2_FS) += gfs2/ 123obj-$(CONFIG_GFS2_FS) += gfs2/
122obj-$(CONFIG_EXOFS_FS) += exofs/ 124obj-$(CONFIG_EXOFS_FS) += exofs/
123obj-$(CONFIG_CEPH_FS) += ceph/ 125obj-$(CONFIG_CEPH_FS) += ceph/
126obj-$(CONFIG_PSTORE) += pstore/
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index 1dd5f34b3cf2..e55182a74605 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,7 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support (EXPERIMENTAL)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK && EXPERIMENTAL
4 depends on BKL # need to fix
5 help 4 help
6 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
7 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 3b4a764ed780..3d83075aaa2e 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -9,7 +9,6 @@
9 * 9 *
10 * Common directory handling for ADFS 10 * Common directory handling for ADFS
11 */ 11 */
12#include <linux/smp_lock.h>
13#include "adfs.h" 12#include "adfs.h"
14 13
15/* 14/*
@@ -27,8 +26,6 @@ adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
27 struct adfs_dir dir; 26 struct adfs_dir dir;
28 int ret = 0; 27 int ret = 0;
29 28
30 lock_kernel();
31
32 if (filp->f_pos >> 32) 29 if (filp->f_pos >> 32)
33 goto out; 30 goto out;
34 31
@@ -70,7 +67,6 @@ free_out:
70 ops->free(&dir); 67 ops->free(&dir);
71 68
72out: 69out:
73 unlock_kernel();
74 return ret; 70 return ret;
75} 71}
76 72
@@ -276,7 +272,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
276 struct object_info obj; 272 struct object_info obj;
277 int error; 273 int error;
278 274
279 lock_kernel();
280 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); 275 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
281 if (error == 0) { 276 if (error == 0) {
282 error = -EACCES; 277 error = -EACCES;
@@ -288,7 +283,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
288 if (inode) 283 if (inode)
289 error = 0; 284 error = 0;
290 } 285 }
291 unlock_kernel();
292 d_add(dentry, inode); 286 d_add(dentry, inode);
293 return ERR_PTR(error); 287 return ERR_PTR(error);
294} 288}
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 65794b8fe79e..09fe40198d1c 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -7,7 +7,6 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/smp_lock.h>
11#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
12#include <linux/writeback.h> 11#include <linux/writeback.h>
13#include "adfs.h" 12#include "adfs.h"
@@ -316,8 +315,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
316 unsigned int ia_valid = attr->ia_valid; 315 unsigned int ia_valid = attr->ia_valid;
317 int error; 316 int error;
318 317
319 lock_kernel();
320
321 error = inode_change_ok(inode, attr); 318 error = inode_change_ok(inode, attr);
322 319
323 /* 320 /*
@@ -359,7 +356,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
359 if (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MODE)) 356 if (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MODE))
360 mark_inode_dirty(inode); 357 mark_inode_dirty(inode);
361out: 358out:
362 unlock_kernel();
363 return error; 359 return error;
364} 360}
365 361
@@ -374,7 +370,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
374 struct object_info obj; 370 struct object_info obj;
375 int ret; 371 int ret;
376 372
377 lock_kernel();
378 obj.file_id = inode->i_ino; 373 obj.file_id = inode->i_ino;
379 obj.name_len = 0; 374 obj.name_len = 0;
380 obj.parent_id = ADFS_I(inode)->parent_id; 375 obj.parent_id = ADFS_I(inode)->parent_id;
@@ -384,6 +379,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
384 obj.size = inode->i_size; 379 obj.size = inode->i_size;
385 380
386 ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); 381 ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
387 unlock_kernel();
388 return ret; 382 return ret;
389} 383}
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 2d7954049fbe..06d7388b477b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -14,7 +14,6 @@
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/smp_lock.h>
18#include <linux/statfs.h> 17#include <linux/statfs.h>
19#include "adfs.h" 18#include "adfs.h"
20#include "dir_f.h" 19#include "dir_f.h"
@@ -120,15 +119,11 @@ static void adfs_put_super(struct super_block *sb)
120 int i; 119 int i;
121 struct adfs_sb_info *asb = ADFS_SB(sb); 120 struct adfs_sb_info *asb = ADFS_SB(sb);
122 121
123 lock_kernel();
124
125 for (i = 0; i < asb->s_map_size; i++) 122 for (i = 0; i < asb->s_map_size; i++)
126 brelse(asb->s_map[i].dm_bh); 123 brelse(asb->s_map[i].dm_bh);
127 kfree(asb->s_map); 124 kfree(asb->s_map);
128 kfree(asb); 125 kfree(asb);
129 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
130
131 unlock_kernel();
132} 127}
133 128
134static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 129static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
@@ -359,15 +354,11 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
359 struct adfs_sb_info *asb; 354 struct adfs_sb_info *asb;
360 struct inode *root; 355 struct inode *root;
361 356
362 lock_kernel();
363
364 sb->s_flags |= MS_NODIRATIME; 357 sb->s_flags |= MS_NODIRATIME;
365 358
366 asb = kzalloc(sizeof(*asb), GFP_KERNEL); 359 asb = kzalloc(sizeof(*asb), GFP_KERNEL);
367 if (!asb) { 360 if (!asb)
368 unlock_kernel();
369 return -ENOMEM; 361 return -ENOMEM;
370 }
371 sb->s_fs_info = asb; 362 sb->s_fs_info = asb;
372 363
373 /* set default options */ 364 /* set default options */
@@ -485,7 +476,6 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
485 adfs_error(sb, "get root inode failed\n"); 476 adfs_error(sb, "get root inode failed\n");
486 goto error; 477 goto error;
487 } 478 }
488 unlock_kernel();
489 return 0; 479 return 0;
490 480
491error_free_bh: 481error_free_bh:
@@ -493,7 +483,6 @@ error_free_bh:
493error: 483error:
494 sb->s_fs_info = NULL; 484 sb->s_fs_info = NULL;
495 kfree(asb); 485 kfree(asb);
496 unlock_kernel();
497 return -EINVAL; 486 return -EINVAL;
498} 487}
499 488
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..7f54f43b8f7c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 87
88 aio_wq = create_workqueue("aio"); 88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
90 BUG_ON(!aio_wq || !abe_pool); 90 BUG_ON(!aio_wq || !abe_pool);
91 91
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -569,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
569 spin_lock(&fput_lock); 577 spin_lock(&fput_lock);
570 list_add(&req->ki_list, &fput_head); 578 list_add(&req->ki_list, &fput_head);
571 spin_unlock(&fput_lock); 579 spin_unlock(&fput_lock);
572 queue_work(aio_wq, &fput_work); 580 schedule_work(&fput_work);
573 } else { 581 } else {
574 req->ki_filp = NULL; 582 req->ki_filp = NULL;
575 really_put_req(ctx, req); 583 really_put_req(ctx, req);
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4fb8a3431531..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1600,7 +1607,7 @@ fail:
1600} 1607}
1601EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1602 1609
1603int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1604{ 1611{
1605 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1606 int res = 0; 1613 int res = 0;
@@ -1613,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1613 * hold). 1620 * hold).
1614 */ 1621 */
1615 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1616 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1617 drop_super(sb); 1624 drop_super(sb);
1618 } 1625 }
1619 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1263#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1264#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1265#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1266#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1267
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1268#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1269#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2228 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2230 u64 num_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type);
2221 2233
2222/* ctree.c */ 2234/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
@@ -5376,7 +5387,7 @@ again:
5376 num_bytes, data, 1); 5387 num_bytes, data, 1);
5377 goto again; 5388 goto again;
5378 } 5389 }
5379 if (ret == -ENOSPC) { 5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5391 struct btrfs_space_info *sinfo;
5381 5392
5382 sinfo = __find_space_info(root->fs_info, data); 5393 sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
8065 return ret; 8076 return ret;
8066} 8077}
8067 8078
8079int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type)
8081{
8082 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8084}
8085
8068/* 8086/*
8069 * helper to account the unused space of all the readonly block group in the 8087 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8088 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c518..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -2912,6 +2918,46 @@ out:
2912 return sector; 2918 return sector;
2913} 2919}
2914 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2915int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2916 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2917{ 2963{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2921 u32 flags = 0; 2967 u32 flags = 0;
2922 u32 found_type; 2968 u32 found_type;
2923 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2924 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2925 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2926 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2927 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2928 struct btrfs_path *path; 2976 struct btrfs_path *path;
2929 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2930 int end = 0; 2978 int end = 0;
2931 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2932 unsigned long emflags; 2982 unsigned long emflags;
2933 int hole = 0;
2934 2983
2935 if (len == 0) 2984 if (len == 0)
2936 return -EINVAL; 2985 return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2940 return -ENOMEM; 2989 return -ENOMEM;
2941 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2942 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2943 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2944 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2945 if (ret < 0) { 2998 if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2954 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2955 3008
2956 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2957 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2958 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2959 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2960 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2961 } 3023 }
2962 last = found_key.offset;
2963 btrfs_free_path(path); 3024 btrfs_free_path(path);
2964 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2965 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2966 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2967 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2968 if (!em) 3041 if (!em)
2969 goto out; 3042 goto out;
2970 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2973 } 3046 }
2974 3047
2975 while (!end) { 3048 while (!end) {
2976 hole = 0; 3049 u64 offset_in_extent;
2977 off = em->start + em->len;
2978 if (off >= max)
2979 end = 1;
2980 3050
2981 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2982 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2983 goto next; 3053 break;
2984 }
2985 3054
2986 em_start = em->start; 3055 /*
2987 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2988 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2989 disko = 0; 3071 disko = 0;
2990 flags = 0; 3072 flags = 0;
2991 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2992 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2993 end = 1; 3082 end = 1;
2994 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2999 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
3000 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
3001 } else { 3090 } else {
3002 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
3003 } 3092 }
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
3006 3095
3007next:
3008 emflags = em->flags;
3009 free_extent_map(em); 3096 free_extent_map(em);
3010 em = NULL; 3097 em = NULL;
3011 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3013 if (!em)
3014 goto out;
3015 if (IS_ERR(em)) {
3016 ret = PTR_ERR(em);
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 }
3021
3022 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3023 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3024 end = 1; 3101 end = 1;
3025 } 3102 }
3026 3103
3027 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3028 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3029 end = 1; 3113 end = 1;
3030 } 3114 }
3031 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3032 if (!hole) { 3116 em_len, flags);
3033 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3034 em_len, flags); 3118 goto out_free;
3035 if (ret)
3036 goto out_free;
3037 }
3038 } 3119 }
3039out_free: 3120out_free:
3040 free_extent_map(em); 3121 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -763,6 +776,27 @@ out:
763} 776}
764 777
765/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
766 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
767 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
768 * modified. 802 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
777 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
778 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 813 int err = 0;
814 int faili = 0;
780 u64 start_pos; 815 u64 start_pos;
781 u64 last_pos; 816 u64 last_pos;
782 817
@@ -794,15 +829,24 @@ again:
794 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
795 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
796 if (!pages[i]) { 831 if (!pages[i]) {
797 int c; 832 faili = i - 1;
798 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
799 unlock_page(pages[c]); 834 goto fail;
800 page_cache_release(pages[c]); 835 }
801 } 836
802 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
803 } 846 }
804 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
805 } 848 }
849 err = 0;
806 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
807 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
808 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
843 } 887 }
844 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
845} 897}
846 898
847static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
852 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
853 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
854 struct page *pinned[2];
855 struct page **pages = NULL; 906 struct page **pages = NULL;
856 struct iov_iter i; 907 struct iov_iter i;
857 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
873 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
874 925
875 pinned[0] = NULL;
876 pinned[1] = NULL;
877
878 start_pos = pos; 926 start_pos = pos;
879 927
880 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
963 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964 1012
965 /*
966 * there are lots of better ways to do this, but this code
967 * makes sure the first and last page in the file range are
968 * up to date and ready for cow
969 */
970 if ((pos & (PAGE_CACHE_SIZE - 1))) {
971 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972 if (!PageUptodate(pinned[0])) {
973 ret = btrfs_readpage(NULL, pinned[0]);
974 BUG_ON(ret);
975 wait_on_page_locked(pinned[0]);
976 } else {
977 unlock_page(pinned[0]);
978 }
979 }
980 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982 if (!PageUptodate(pinned[1])) {
983 ret = btrfs_readpage(NULL, pinned[1]);
984 BUG_ON(ret);
985 wait_on_page_locked(pinned[1]);
986 } else {
987 unlock_page(pinned[1]);
988 }
989 }
990
991 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
992 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024 1046
1025 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1026 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1028 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1029 1063
1030 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1031 if (copied > 0) 1065 if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069 err = ret; 1103 err = ret;
1070 1104
1071 kfree(pages); 1105 kfree(pages);
1072 if (pinned[0])
1073 page_cache_release(pinned[0]);
1074 if (pinned[1])
1075 page_cache_release(pinned[1]);
1076 *ppos = pos; 1106 *ppos = pos;
1077 1107
1078 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6d..512c3d1da083 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -90,13 +90,14 @@ static noinline int cow_file_range(struct inode *inode,
90 unsigned long *nr_written, int unlock); 90 unsigned long *nr_written, int unlock);
91 91
92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
93 struct inode *inode, struct inode *dir) 93 struct inode *inode, struct inode *dir,
94 const struct qstr *qstr)
94{ 95{
95 int err; 96 int err;
96 97
97 err = btrfs_init_acl(trans, inode, dir); 98 err = btrfs_init_acl(trans, inode, dir);
98 if (!err) 99 if (!err)
99 err = btrfs_xattr_security_init(trans, inode, dir); 100 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
100 return err; 101 return err;
101} 102}
102 103
@@ -1913,7 +1914,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1913 1914
1914 private = 0; 1915 private = 0;
1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1916 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1916 (u64)-1, 1, EXTENT_DIRTY)) { 1917 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1918 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1918 start, &private_failure); 1919 start, &private_failure);
1919 if (ret == 0) { 1920 if (ret == 0) {
@@ -4704,7 +4705,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4704 if (IS_ERR(inode)) 4705 if (IS_ERR(inode))
4705 goto out_unlock; 4706 goto out_unlock;
4706 4707
4707 err = btrfs_init_inode_security(trans, inode, dir); 4708 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4708 if (err) { 4709 if (err) {
4709 drop_inode = 1; 4710 drop_inode = 1;
4710 goto out_unlock; 4711 goto out_unlock;
@@ -4765,7 +4766,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4765 if (IS_ERR(inode)) 4766 if (IS_ERR(inode))
4766 goto out_unlock; 4767 goto out_unlock;
4767 4768
4768 err = btrfs_init_inode_security(trans, inode, dir); 4769 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4769 if (err) { 4770 if (err) {
4770 drop_inode = 1; 4771 drop_inode = 1;
4771 goto out_unlock; 4772 goto out_unlock;
@@ -4806,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4807 int err;
4807 int drop_inode = 0; 4808 int drop_inode = 0;
4808 4809
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4810 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4811 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4812 return -EPERM;
@@ -4821,10 +4819,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4821 goto fail; 4819 goto fail;
4822 4820
4823 /* 4821 /*
4824 * 1 item for inode ref 4822 * 2 items for inode and inode ref
4825 * 2 items for dir items 4823 * 2 items for dir items
4824 * 1 item for parent inode
4826 */ 4825 */
4827 trans = btrfs_start_transaction(root, 3); 4826 trans = btrfs_start_transaction(root, 5);
4828 if (IS_ERR(trans)) { 4827 if (IS_ERR(trans)) {
4829 err = PTR_ERR(trans); 4828 err = PTR_ERR(trans);
4830 goto fail; 4829 goto fail;
@@ -4893,7 +4892,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4893 4892
4894 drop_on_err = 1; 4893 drop_on_err = 1;
4895 4894
4896 err = btrfs_init_inode_security(trans, inode, dir); 4895 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4897 if (err) 4896 if (err)
4898 goto out_fail; 4897 goto out_fail;
4899 4898
@@ -5280,6 +5279,128 @@ out:
5280 return em; 5279 return em;
5281} 5280}
5282 5281
5282struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5283 size_t pg_offset, u64 start, u64 len,
5284 int create)
5285{
5286 struct extent_map *em;
5287 struct extent_map *hole_em = NULL;
5288 u64 range_start = start;
5289 u64 end;
5290 u64 found;
5291 u64 found_end;
5292 int err = 0;
5293
5294 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5295 if (IS_ERR(em))
5296 return em;
5297 if (em) {
5298 /*
5299 * if our em maps to a hole, there might
5300 * actually be delalloc bytes behind it
5301 */
5302 if (em->block_start != EXTENT_MAP_HOLE)
5303 return em;
5304 else
5305 hole_em = em;
5306 }
5307
5308 /* check to see if we've wrapped (len == -1 or similar) */
5309 end = start + len;
5310 if (end < start)
5311 end = (u64)-1;
5312 else
5313 end -= 1;
5314
5315 em = NULL;
5316
5317 /* ok, we didn't find anything, lets look for delalloc */
5318 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5319 end, len, EXTENT_DELALLOC, 1);
5320 found_end = range_start + found;
5321 if (found_end < range_start)
5322 found_end = (u64)-1;
5323
5324 /*
5325 * we didn't find anything useful, return
5326 * the original results from get_extent()
5327 */
5328 if (range_start > end || found_end <= start) {
5329 em = hole_em;
5330 hole_em = NULL;
5331 goto out;
5332 }
5333
5334 /* adjust the range_start to make sure it doesn't
5335 * go backwards from the start they passed in
5336 */
5337 range_start = max(start,range_start);
5338 found = found_end - range_start;
5339
5340 if (found > 0) {
5341 u64 hole_start = start;
5342 u64 hole_len = len;
5343
5344 em = alloc_extent_map(GFP_NOFS);
5345 if (!em) {
5346 err = -ENOMEM;
5347 goto out;
5348 }
5349 /*
5350 * when btrfs_get_extent can't find anything it
5351 * returns one huge hole
5352 *
5353 * make sure what it found really fits our range, and
5354 * adjust to make sure it is based on the start from
5355 * the caller
5356 */
5357 if (hole_em) {
5358 u64 calc_end = extent_map_end(hole_em);
5359
5360 if (calc_end <= start || (hole_em->start > end)) {
5361 free_extent_map(hole_em);
5362 hole_em = NULL;
5363 } else {
5364 hole_start = max(hole_em->start, start);
5365 hole_len = calc_end - hole_start;
5366 }
5367 }
5368 em->bdev = NULL;
5369 if (hole_em && range_start > hole_start) {
5370 /* our hole starts before our delalloc, so we
5371 * have to return just the parts of the hole
5372 * that go until the delalloc starts
5373 */
5374 em->len = min(hole_len,
5375 range_start - hole_start);
5376 em->start = hole_start;
5377 em->orig_start = hole_start;
5378 /*
5379 * don't adjust block start at all,
5380 * it is fixed at EXTENT_MAP_HOLE
5381 */
5382 em->block_start = hole_em->block_start;
5383 em->block_len = hole_len;
5384 } else {
5385 em->start = range_start;
5386 em->len = found;
5387 em->orig_start = range_start;
5388 em->block_start = EXTENT_MAP_DELALLOC;
5389 em->block_len = found;
5390 }
5391 } else if (hole_em) {
5392 return hole_em;
5393 }
5394out:
5395
5396 free_extent_map(hole_em);
5397 if (err) {
5398 free_extent_map(em);
5399 return ERR_PTR(err);
5400 }
5401 return em;
5402}
5403
5283static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5404static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5284 u64 start, u64 len) 5405 u64 start, u64 len)
5285{ 5406{
@@ -5934,6 +6055,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5934 if (!skip_sum) { 6055 if (!skip_sum) {
5935 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6056 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5936 if (!dip->csums) { 6057 if (!dip->csums) {
6058 kfree(dip);
5937 ret = -ENOMEM; 6059 ret = -ENOMEM;
5938 goto free_ordered; 6060 goto free_ordered;
5939 } 6061 }
@@ -6102,7 +6224,7 @@ out:
6102static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6224static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6103 __u64 start, __u64 len) 6225 __u64 start, __u64 len)
6104{ 6226{
6105 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6227 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6106} 6228}
6107 6229
6108int btrfs_readpage(struct file *file, struct page *page) 6230int btrfs_readpage(struct file *file, struct page *page)
@@ -6982,7 +7104,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6982 if (IS_ERR(inode)) 7104 if (IS_ERR(inode))
6983 goto out_unlock; 7105 goto out_unlock;
6984 7106
6985 err = btrfs_init_inode_security(trans, inode, dir); 7107 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6986 if (err) { 7108 if (err) {
6987 drop_inode = 1; 7109 drop_inode = 1;
6988 goto out_unlock; 7110 goto out_unlock;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3654 u32 item_size; 3654 u32 item_size;
3655 int ret; 3655 int ret;
3656 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3657 3658
3658 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3659 if (!path) 3660 if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3666 } 3667 }
3667 3668
3668 while (1) { 3669 while (1) {
3670 progress++;
3669 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3670 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3671 3673restart:
3672 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3673 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3674 continue; 3676 continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3781 } 3783 }
3782 } 3784 }
3783 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3784 3795
3785 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3786 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca15276..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1338 1338
1339 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1340 if (ret) 1340 if (ret)
1341 goto error_brelse; 1341 goto error_undo;
1342 1342
1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1344 if (ret) 1344 if (ret)
1345 goto error_brelse; 1345 goto error_undo;
1346 1346
1347 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1348 1348
@@ -1416,6 +1416,13 @@ out:
1416 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1417 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1418 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1419} 1426}
1420 1427
1421/* 1428/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1633 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1634 device->bdev = bdev; 1641 device->bdev = bdev;
1635 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1636 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1637 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1638 1645
1639 if (seeding_dev) { 1646 if (seeding_dev) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2b..d779cefcfd7d 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
370} 370}
371 371
372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
373 struct inode *inode, struct inode *dir) 373 struct inode *inode, struct inode *dir,
374 const struct qstr *qstr)
374{ 375{
375 int err; 376 int err;
376 size_t len; 377 size_t len;
@@ -378,7 +379,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
378 char *suffix; 379 char *suffix;
379 char *name; 380 char *name;
380 381
381 err = security_inode_init_security(inode, dir, &suffix, &value, &len); 382 err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
383 &len);
382 if (err) { 384 if (err) {
383 if (err == -EOPNOTSUPP) 385 if (err == -EOPNOTSUPP)
384 return 0; 386 return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bbb..b3cc8039134b 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir); 40 struct inode *inode, struct inode *dir,
41 const struct qstr *qstr);
41 42
42#endif /* __XATTR__ */ 43#endif /* __XATTR__ */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 42c7fafc8bfe..a0358c2189cb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
275 bool preemptive) 275 bool preemptive)
276{ 276{
277 struct dentry *grave, *trap; 277 struct dentry *grave, *trap;
278 struct path path, path_to_graveyard;
278 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
279 int ret; 280 int ret;
280 281
@@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
287 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
288 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
289 _debug("unlink stale object"); 290 _debug("unlink stale object");
290 ret = vfs_unlink(dir->d_inode, rep);
291 291
292 if (preemptive) 292 path.mnt = cache->mnt;
293 cachefiles_mark_object_buried(cache, rep); 293 path.dentry = dir;
294 ret = security_path_unlink(&path, rep);
295 if (ret < 0) {
296 cachefiles_io_error(cache, "Unlink security error");
297 } else {
298 ret = vfs_unlink(dir->d_inode, rep);
299
300 if (preemptive)
301 cachefiles_mark_object_buried(cache, rep);
302 }
294 303
295 mutex_unlock(&dir->d_inode->i_mutex); 304 mutex_unlock(&dir->d_inode->i_mutex);
296 305
@@ -379,12 +388,23 @@ try_again:
379 } 388 }
380 389
381 /* attempt the rename */ 390 /* attempt the rename */
382 ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); 391 path.mnt = cache->mnt;
383 if (ret != 0 && ret != -ENOMEM) 392 path.dentry = dir;
384 cachefiles_io_error(cache, "Rename failed with error %d", ret); 393 path_to_graveyard.mnt = cache->mnt;
394 path_to_graveyard.dentry = cache->graveyard;
395 ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
396 if (ret < 0) {
397 cachefiles_io_error(cache, "Rename security error %d", ret);
398 } else {
399 ret = vfs_rename(dir->d_inode, rep,
400 cache->graveyard->d_inode, grave);
401 if (ret != 0 && ret != -ENOMEM)
402 cachefiles_io_error(cache,
403 "Rename failed with error %d", ret);
385 404
386 if (preemptive) 405 if (preemptive)
387 cachefiles_mark_object_buried(cache, rep); 406 cachefiles_mark_object_buried(cache, rep);
407 }
388 408
389 unlock_rename(cache->graveyard, dir); 409 unlock_rename(cache->graveyard, dir);
390 dput(grave); 410 dput(grave);
@@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
448{ 468{
449 struct cachefiles_cache *cache; 469 struct cachefiles_cache *cache;
450 struct dentry *dir, *next = NULL; 470 struct dentry *dir, *next = NULL;
471 struct path path;
451 unsigned long start; 472 unsigned long start;
452 const char *name; 473 const char *name;
453 int ret, nlen; 474 int ret, nlen;
@@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
458 479
459 cache = container_of(parent->fscache.cache, 480 cache = container_of(parent->fscache.cache,
460 struct cachefiles_cache, cache); 481 struct cachefiles_cache, cache);
482 path.mnt = cache->mnt;
461 483
462 ASSERT(parent->dentry); 484 ASSERT(parent->dentry);
463 ASSERT(parent->dentry->d_inode); 485 ASSERT(parent->dentry->d_inode);
@@ -511,6 +533,10 @@ lookup_again:
511 if (ret < 0) 533 if (ret < 0)
512 goto create_error; 534 goto create_error;
513 535
536 path.dentry = dir;
537 ret = security_path_mkdir(&path, next, 0);
538 if (ret < 0)
539 goto create_error;
514 start = jiffies; 540 start = jiffies;
515 ret = vfs_mkdir(dir->d_inode, next, 0); 541 ret = vfs_mkdir(dir->d_inode, next, 0);
516 cachefiles_hist(cachefiles_mkdir_histogram, start); 542 cachefiles_hist(cachefiles_mkdir_histogram, start);
@@ -536,6 +562,10 @@ lookup_again:
536 if (ret < 0) 562 if (ret < 0)
537 goto create_error; 563 goto create_error;
538 564
565 path.dentry = dir;
566 ret = security_path_mknod(&path, next, S_IFREG, 0);
567 if (ret < 0)
568 goto create_error;
539 start = jiffies; 569 start = jiffies;
540 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); 570 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
541 cachefiles_hist(cachefiles_create_histogram, start); 571 cachefiles_hist(cachefiles_create_histogram, start);
@@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
692{ 722{
693 struct dentry *subdir; 723 struct dentry *subdir;
694 unsigned long start; 724 unsigned long start;
725 struct path path;
695 int ret; 726 int ret;
696 727
697 _enter(",,%s", dirname); 728 _enter(",,%s", dirname);
@@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
719 750
720 _debug("attempt mkdir"); 751 _debug("attempt mkdir");
721 752
753 path.mnt = cache->mnt;
754 path.dentry = dir;
755 ret = security_path_mkdir(&path, subdir, 0700);
756 if (ret < 0)
757 goto mkdir_error;
722 ret = vfs_mkdir(dir->d_inode, subdir, 0700); 758 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
723 if (ret < 0) 759 if (ret < 0)
724 goto mkdir_error; 760 goto mkdir_error;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f0aef787a102..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry)
60 } 60 }
61 di->dentry = dentry; 61 di->dentry = dentry;
62 di->lease_session = NULL; 62 di->lease_session = NULL;
63 di->parent_inode = igrab(dentry->d_parent->d_inode);
64 dentry->d_fsdata = di; 63 dentry->d_fsdata = di;
65 dentry->d_time = jiffies; 64 dentry->d_time = jiffies;
66 ceph_dentry_lru_add(dentry); 65 ceph_dentry_lru_add(dentry);
@@ -410,7 +409,7 @@ more:
410 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
411 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
412 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
413 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
414 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
415 } 414 }
416 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
497 496
498 /* .snap dir? */ 497 /* .snap dir? */
499 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
500 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
501 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
502 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
993{ 993{
994 struct inode *dir; 994 struct inode *dir;
995 995
996 if (nd->flags & LOOKUP_RCU) 996 if (nd && nd->flags & LOOKUP_RCU)
997 return -ECHILD; 997 return -ECHILD;
998 998
999 dir = dentry->d_parent->d_inode; 999 dir = dentry->d_parent->d_inode;
@@ -1030,28 +1030,8 @@ out_touch:
1030static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1031{ 1031{
1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1033 struct inode *parent_inode = NULL;
1034 u64 snapid = CEPH_NOSNAP;
1035 1033
1036 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1037 parent_inode = di->parent_inode;
1038 if (parent_inode)
1039 snapid = ceph_snap(parent_inode);
1040 }
1041 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1042 if (parent_inode && snapid != CEPH_SNAPDIR) {
1043 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1044
1045 spin_lock(&parent_inode->i_lock);
1046 if (ci->i_shared_gen == di->lease_shared_gen ||
1047 snapid <= CEPH_MAXSNAP) {
1048 dout(" clearing %p complete (d_release)\n",
1049 parent_inode);
1050 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1051 ci->i_release_count++;
1052 }
1053 spin_unlock(&parent_inode->i_lock);
1054 }
1055 if (di) { 1035 if (di) {
1056 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1057 if (di->lease_session) 1037 if (di->lease_session)
@@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry)
1059 kmem_cache_free(ceph_dentry_cachep, di); 1039 kmem_cache_free(ceph_dentry_cachep, di);
1060 dentry->d_fsdata = NULL; 1040 dentry->d_fsdata = NULL;
1061 } 1041 }
1062 if (parent_inode)
1063 iput(parent_inode);
1064} 1042}
1065 1043
1066static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1044static int ceph_snapdir_d_revalidate(struct dentry *dentry,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713 break; 713 break;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88fcaa21b801..20b907d76ae2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -207,7 +207,6 @@ struct ceph_dentry_info {
207 struct dentry *dentry; 207 struct dentry *dentry;
208 u64 time; 208 u64 time;
209 u64 offset; 209 u64 offset;
210 struct inode *parent_inode;
211}; 210};
212 211
213struct ceph_inode_xattrs_info { 212struct ceph_inode_xattrs_info {
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1200 file = fget_light(fd, &fput_needed);
1229 if (!file) 1201 if (!file)
1230 return -EBADF; 1202 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1203 ret = -ESPIPE;
1204 if (file->f_mode & FMODE_PREAD)
1205 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1206 fput_light(file, fput_needed);
1233 return ret; 1207 return ret;
1234} 1208}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1259 file = fget_light(fd, &fput_needed);
1286 if (!file) 1260 if (!file)
1287 return -EBADF; 1261 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1262 ret = -ESPIPE;
1263 if (file->f_mode & FMODE_PWRITE)
1264 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1265 fput_light(file, fput_needed);
1290 return ret; 1266 return ret;
1291} 1267}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2308} 2284}
2309 2285
2310#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..a39fe47c466f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1528}
1524EXPORT_SYMBOL(d_alloc_root); 1529EXPORT_SYMBOL(d_alloc_root);
1525 1530
1531static struct dentry * __d_find_any_alias(struct inode *inode)
1532{
1533 struct dentry *alias;
1534
1535 if (list_empty(&inode->i_dentry))
1536 return NULL;
1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1538 __dget(alias);
1539 return alias;
1540}
1541
1542static struct dentry * d_find_any_alias(struct inode *inode)
1543{
1544 struct dentry *de;
1545
1546 spin_lock(&inode->i_lock);
1547 de = __d_find_any_alias(inode);
1548 spin_unlock(&inode->i_lock);
1549 return de;
1550}
1551
1552
1526/** 1553/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1554 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1555 * @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1579 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1580 return ERR_CAST(inode);
1554 1581
1555 res = d_find_alias(inode); 1582 res = d_find_any_alias(inode);
1556 if (res) 1583 if (res)
1557 goto out_iput; 1584 goto out_iput;
1558 1585
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1592
1566 1593
1567 spin_lock(&inode->i_lock); 1594 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1595 res = __d_find_any_alias(inode);
1569 if (res) { 1596 if (res) {
1570 spin_unlock(&inode->i_lock); 1597 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1598 dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
2920 spin_unlock(&dentry->d_lock); 2947 spin_unlock(&dentry->d_lock);
2921 } 2948 }
2922 if (this_parent != root) { 2949 if (this_parent != root) {
2923 struct dentry *tmp; 2950 struct dentry *child = this_parent;
2924 struct dentry *child;
2925
2926 tmp = this_parent->d_parent;
2927 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2928 this_parent->d_flags |= DCACHE_GENOCIDE; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2929 this_parent->d_count--; 2953 this_parent->d_count--;
2930 } 2954 }
2931 rcu_read_lock(); 2955 this_parent = try_to_ascend(this_parent, locked, seq);
2932 spin_unlock(&this_parent->d_lock); 2956 if (!this_parent)
2933 child = this_parent;
2934 this_parent = tmp;
2935 spin_lock(&this_parent->d_lock);
2936 /* might go back up the wrong parent if we have had a rename
2937 * or deletion */
2938 if (this_parent != child->d_parent ||
2939 (!locked && read_seqretry(&rename_lock, seq))) {
2940 spin_unlock(&this_parent->d_lock);
2941 rcu_read_unlock();
2942 goto rename_retry; 2957 goto rename_retry;
2943 }
2944 rcu_read_unlock();
2945 next = child->d_u.d_child.next; 2958 next = child->d_u.d_child.next;
2946 goto resume; 2959 goto resume;
2947 } 2960 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 37a8ca7c1222..e7a7a2f07324 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -13,9 +13,6 @@
13 * 13 *
14 */ 14 */
15 15
16/* uncomment to get debug messages from the debug filesystem, ah the irony. */
17/* #define DEBUG */
18
19#include <linux/module.h> 16#include <linux/module.h>
20#include <linux/fs.h> 17#include <linux/fs.h>
21#include <linux/mount.h> 18#include <linux/mount.h>
@@ -310,7 +307,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
310} 307}
311EXPORT_SYMBOL_GPL(debugfs_create_symlink); 308EXPORT_SYMBOL_GPL(debugfs_create_symlink);
312 309
313static void __debugfs_remove(struct dentry *dentry, struct dentry *parent) 310static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
314{ 311{
315 int ret = 0; 312 int ret = 0;
316 313
@@ -333,6 +330,7 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
333 dput(dentry); 330 dput(dentry);
334 } 331 }
335 } 332 }
333 return ret;
336} 334}
337 335
338/** 336/**
@@ -351,7 +349,8 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
351void debugfs_remove(struct dentry *dentry) 349void debugfs_remove(struct dentry *dentry)
352{ 350{
353 struct dentry *parent; 351 struct dentry *parent;
354 352 int ret;
353
355 if (!dentry) 354 if (!dentry)
356 return; 355 return;
357 356
@@ -360,9 +359,10 @@ void debugfs_remove(struct dentry *dentry)
360 return; 359 return;
361 360
362 mutex_lock(&parent->d_inode->i_mutex); 361 mutex_lock(&parent->d_inode->i_mutex);
363 __debugfs_remove(dentry, parent); 362 ret = __debugfs_remove(dentry, parent);
364 mutex_unlock(&parent->d_inode->i_mutex); 363 mutex_unlock(&parent->d_inode->i_mutex);
365 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 364 if (!ret)
365 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
366} 366}
367EXPORT_SYMBOL_GPL(debugfs_remove); 367EXPORT_SYMBOL_GPL(debugfs_remove);
368 368
@@ -540,17 +540,5 @@ static int __init debugfs_init(void)
540 540
541 return retval; 541 return retval;
542} 542}
543
544static void __exit debugfs_exit(void)
545{
546 debugfs_registered = false;
547
548 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
549 unregister_filesystem(&debug_fs_type);
550 kobject_put(debug_kobj);
551}
552
553core_initcall(debugfs_init); 543core_initcall(debugfs_init);
554module_exit(debugfs_exit);
555MODULE_LICENSE("GPL");
556 544
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1198,6 +1208,62 @@ retry:
1198 return res; 1208 return res;
1199} 1209}
1200 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1201/* 1267/*
1202 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1203 */ 1269 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1246 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1247{ 1313{
1248 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1249 struct file *file, *tfile; 1316 struct file *file, *tfile;
1250 struct eventpoll *ep; 1317 struct eventpoll *ep;
1251 struct epitem *epi; 1318 struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1287 */ 1354 */
1288 ep = file->private_data; 1355 ep = file->private_data;
1289 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1290 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1291 1377
1292 /* 1378 /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1322 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1323 1409
1324error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1325 fput(tfile); 1414 fput(tfile);
1326error_fput: 1415error_fput:
1327 fput(file); 1416 fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
1441 EP_ITEM_COST; 1530 EP_ITEM_COST;
1442 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1443 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1444 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1445 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1446 1541
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 6346a2acf326..1b48c3370872 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
111 111
112/* ialloc.c */ 112/* ialloc.c */
113extern struct inode * ext2_new_inode (struct inode *, int); 113extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
114extern void ext2_free_inode (struct inode *); 114extern void ext2_free_inode (struct inode *);
115extern unsigned long ext2_count_free_inodes (struct super_block *); 115extern unsigned long ext2_count_free_inodes (struct super_block *);
116extern void ext2_check_inodes_bitmap (struct super_block *); 116extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479aabff..ee9ed31948e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,8 @@ found:
429 return group; 429 return group;
430} 430}
431 431
432struct inode *ext2_new_inode(struct inode *dir, int mode) 432struct inode *ext2_new_inode(struct inode *dir, int mode,
433 const struct qstr *qstr)
433{ 434{
434 struct super_block *sb; 435 struct super_block *sb;
435 struct buffer_head *bitmap_bh = NULL; 436 struct buffer_head *bitmap_bh = NULL;
@@ -585,7 +586,7 @@ got:
585 if (err) 586 if (err)
586 goto fail_free_drop; 587 goto fail_free_drop;
587 588
588 err = ext2_init_security(inode,dir); 589 err = ext2_init_security(inode, dir, qstr);
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..ed5c5d496ee9 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,7 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
104 104
105 dquot_initialize(dir); 105 dquot_initialize(dir);
106 106
107 inode = ext2_new_inode(dir, mode); 107 inode = ext2_new_inode(dir, mode, &dentry->d_name);
108 if (IS_ERR(inode)) 108 if (IS_ERR(inode))
109 return PTR_ERR(inode); 109 return PTR_ERR(inode);
110 110
@@ -133,7 +133,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
133 133
134 dquot_initialize(dir); 134 dquot_initialize(dir);
135 135
136 inode = ext2_new_inode (dir, mode); 136 inode = ext2_new_inode (dir, mode, &dentry->d_name);
137 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
138 if (!IS_ERR(inode)) { 138 if (!IS_ERR(inode)) {
139 init_special_inode(inode, inode->i_mode, rdev); 139 init_special_inode(inode, inode->i_mode, rdev);
@@ -159,7 +159,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
159 159
160 dquot_initialize(dir); 160 dquot_initialize(dir);
161 161
162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); 162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name);
163 err = PTR_ERR(inode); 163 err = PTR_ERR(inode);
164 if (IS_ERR(inode)) 164 if (IS_ERR(inode))
165 goto out; 165 goto out;
@@ -230,7 +230,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
230 230
231 inode_inc_link_count(dir); 231 inode_inc_link_count(dir);
232 232
233 inode = ext2_new_inode (dir, S_IFDIR | mode); 233 inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
234 err = PTR_ERR(inode); 234 err = PTR_ERR(inode);
235 if (IS_ERR(inode)) 235 if (IS_ERR(inode))
236 goto out_dir; 236 goto out_dir;
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index a1a1c2184616..5e41cccff762 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,9 +116,11 @@ exit_ext2_xattr(void)
116# endif /* CONFIG_EXT2_FS_XATTR */ 116# endif /* CONFIG_EXT2_FS_XATTR */
117 117
118#ifdef CONFIG_EXT2_FS_SECURITY 118#ifdef CONFIG_EXT2_FS_SECURITY
119extern int ext2_init_security(struct inode *inode, struct inode *dir); 119extern int ext2_init_security(struct inode *inode, struct inode *dir,
120 const struct qstr *qstr);
120#else 121#else
121static inline int ext2_init_security(struct inode *inode, struct inode *dir) 122static inline int ext2_init_security(struct inode *inode, struct inode *dir,
123 const struct qstr *qstr)
122{ 124{
123 return 0; 125 return 0;
124} 126}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 3004e15d5da5..5d979b4347b0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -47,14 +47,15 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
47} 47}
48 48
49int 49int
50ext2_init_security(struct inode *inode, struct inode *dir) 50ext2_init_security(struct inode *inode, struct inode *dir,
51 const struct qstr *qstr)
51{ 52{
52 int err; 53 int err;
53 size_t len; 54 size_t len;
54 void *value; 55 void *value;
55 char *name; 56 char *name;
56 57
57 err = security_inode_init_security(inode, dir, &name, &value, &len); 58 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
58 if (err) { 59 if (err) {
59 if (err == -EOPNOTSUPP) 60 if (err == -EOPNOTSUPP)
60 return 0; 61 return 0;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 045995c8ce5a..153242187fce 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1991,6 +1991,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1991 spin_unlock(sb_bgl_lock(sbi, group)); 1991 spin_unlock(sb_bgl_lock(sbi, group));
1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); 1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1993 1993
1994 free_blocks -= next - start;
1994 /* Do not issue a TRIM on extents smaller than minblocks */ 1995 /* Do not issue a TRIM on extents smaller than minblocks */
1995 if ((next - start) < minblocks) 1996 if ((next - start) < minblocks)
1996 goto free_extent; 1997 goto free_extent;
@@ -2040,7 +2041,7 @@ free_extent:
2040 cond_resched(); 2041 cond_resched();
2041 2042
2042 /* No more suitable extents */ 2043 /* No more suitable extents */
2043 if ((free_blocks - count) < minblocks) 2044 if (free_blocks < minblocks)
2044 break; 2045 break;
2045 } 2046 }
2046 2047
@@ -2090,7 +2091,8 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2090 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2091 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2091 int ret = 0; 2092 int ret = 0;
2092 2093
2093 start = range->start >> sb->s_blocksize_bits; 2094 start = (range->start >> sb->s_blocksize_bits) +
2095 le32_to_cpu(es->s_first_data_block);
2094 len = range->len >> sb->s_blocksize_bits; 2096 len = range->len >> sb->s_blocksize_bits;
2095 minlen = range->minlen >> sb->s_blocksize_bits; 2097 minlen = range->minlen >> sb->s_blocksize_bits;
2096 trimmed = 0; 2098 trimmed = 0;
@@ -2099,10 +2101,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2099 return -EINVAL; 2101 return -EINVAL;
2100 if (start >= max_blks) 2102 if (start >= max_blks)
2101 goto out; 2103 goto out;
2102 if (start < le32_to_cpu(es->s_first_data_block)) {
2103 len -= le32_to_cpu(es->s_first_data_block) - start;
2104 start = le32_to_cpu(es->s_first_data_block);
2105 }
2106 if (start + len > max_blks) 2104 if (start + len > max_blks)
2107 len = max_blks - start; 2105 len = max_blks - start;
2108 2106
@@ -2129,10 +2127,15 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2129 if (free_blocks < minlen) 2127 if (free_blocks < minlen)
2130 continue; 2128 continue;
2131 2129
2132 if (len >= EXT3_BLOCKS_PER_GROUP(sb)) 2130 /*
2133 len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block); 2131 * For all the groups except the last one, last block will
2134 else 2132 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
2133 * change it for the last group in which case first_block +
2134 * len < EXT3_BLOCKS_PER_GROUP(sb).
2135 */
2136 if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
2135 last_block = first_block + len; 2137 last_block = first_block + len;
2138 len -= last_block - first_block;
2136 2139
2137 ret = ext3_trim_all_free(sb, group, first_block, 2140 ret = ext3_trim_all_free(sb, group, first_block,
2138 last_block, minlen); 2141 last_block, minlen);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9724aef22460..bfc2dc43681d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -404,7 +404,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
404 * For other inodes, search forward from the parent directory's block 404 * For other inodes, search forward from the parent directory's block
405 * group to find a free inode. 405 * group to find a free inode.
406 */ 406 */
407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) 407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
408 const struct qstr *qstr, int mode)
408{ 409{
409 struct super_block *sb; 410 struct super_block *sb;
410 struct buffer_head *bitmap_bh = NULL; 411 struct buffer_head *bitmap_bh = NULL;
@@ -589,7 +590,7 @@ got:
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
592 err = ext3_init_security(handle,inode, dir); 593 err = ext3_init_security(handle, inode, dir, qstr);
593 if (err) 594 if (err)
594 goto fail_free_drop; 595 goto fail_free_drop;
595 596
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..32f3b8695859 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1540,8 +1540,8 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1540 goto cleanup; 1540 goto cleanup;
1541 node2 = (struct dx_node *)(bh2->b_data); 1541 node2 = (struct dx_node *)(bh2->b_data);
1542 entries2 = node2->entries; 1542 entries2 = node2->entries;
1543 memset(&node2->fake, 0, sizeof(struct fake_dirent));
1543 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); 1544 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
1544 node2->fake.inode = 0;
1545 BUFFER_TRACE(frame->bh, "get_write_access"); 1545 BUFFER_TRACE(frame->bh, "get_write_access");
1546 err = ext3_journal_get_write_access(handle, frame->bh); 1546 err = ext3_journal_get_write_access(handle, frame->bh);
1547 if (err) 1547 if (err)
@@ -1710,7 +1710,7 @@ retry:
1710 if (IS_DIRSYNC(dir)) 1710 if (IS_DIRSYNC(dir))
1711 handle->h_sync = 1; 1711 handle->h_sync = 1;
1712 1712
1713 inode = ext3_new_inode (handle, dir, mode); 1713 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1714 err = PTR_ERR(inode); 1714 err = PTR_ERR(inode);
1715 if (!IS_ERR(inode)) { 1715 if (!IS_ERR(inode)) {
1716 inode->i_op = &ext3_file_inode_operations; 1716 inode->i_op = &ext3_file_inode_operations;
@@ -1746,7 +1746,7 @@ retry:
1746 if (IS_DIRSYNC(dir)) 1746 if (IS_DIRSYNC(dir))
1747 handle->h_sync = 1; 1747 handle->h_sync = 1;
1748 1748
1749 inode = ext3_new_inode (handle, dir, mode); 1749 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1750 err = PTR_ERR(inode); 1750 err = PTR_ERR(inode);
1751 if (!IS_ERR(inode)) { 1751 if (!IS_ERR(inode)) {
1752 init_special_inode(inode, inode->i_mode, rdev); 1752 init_special_inode(inode, inode->i_mode, rdev);
@@ -1784,7 +1784,7 @@ retry:
1784 if (IS_DIRSYNC(dir)) 1784 if (IS_DIRSYNC(dir))
1785 handle->h_sync = 1; 1785 handle->h_sync = 1;
1786 1786
1787 inode = ext3_new_inode (handle, dir, S_IFDIR | mode); 1787 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
1788 err = PTR_ERR(inode); 1788 err = PTR_ERR(inode);
1789 if (IS_ERR(inode)) 1789 if (IS_ERR(inode))
1790 goto out_stop; 1790 goto out_stop;
@@ -2206,7 +2206,7 @@ retry:
2206 if (IS_DIRSYNC(dir)) 2206 if (IS_DIRSYNC(dir))
2207 handle->h_sync = 1; 2207 handle->h_sync = 1;
2208 2208
2209 inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); 2209 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
2210 err = PTR_ERR(inode); 2210 err = PTR_ERR(inode);
2211 if (IS_ERR(inode)) 2211 if (IS_ERR(inode))
2212 goto out_stop; 2212 goto out_stop;
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..071689f86e18 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1464,6 +1464,13 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1464 return; 1464 return;
1465 } 1465 }
1466 1466
1467 /* Check if feature set allows readwrite operations */
1468 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1469 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1470 "unknown ROCOMPAT features");
1471 return;
1472 }
1473
1467 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1474 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1468 if (es->s_last_orphan) 1475 if (es->s_last_orphan)
1469 jbd_debug(1, "Errors on filesystem, " 1476 jbd_debug(1, "Errors on filesystem, "
@@ -1936,6 +1943,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1943 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1944 sb->dq_op = &ext3_quota_operations;
1938#endif 1945#endif
1946 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1947 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1948 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1949 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 377fe7201169..2be4f69bfa64 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -128,10 +128,10 @@ exit_ext3_xattr(void)
128 128
129#ifdef CONFIG_EXT3_FS_SECURITY 129#ifdef CONFIG_EXT3_FS_SECURITY
130extern int ext3_init_security(handle_t *handle, struct inode *inode, 130extern int ext3_init_security(handle_t *handle, struct inode *inode,
131 struct inode *dir); 131 struct inode *dir, const struct qstr *qstr);
132#else 132#else
133static inline int ext3_init_security(handle_t *handle, struct inode *inode, 133static inline int ext3_init_security(handle_t *handle, struct inode *inode,
134 struct inode *dir) 134 struct inode *dir, const struct qstr *qstr)
135{ 135{
136 return 0; 136 return 0;
137} 137}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 03a99bfc59f9..b8d9f83aa5c5 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -49,14 +49,15 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index eb9097aec6f0..78b79e1bd7ed 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1042,7 +1042,7 @@ got:
1042 if (err) 1042 if (err)
1043 goto fail_free_drop; 1043 goto fail_free_drop;
1044 1044
1045 err = ext4_init_security(handle, inode, dir); 1045 err = ext4_init_security(handle, inode, dir, qstr);
1046 if (err) 1046 if (err)
1047 goto fail_free_drop; 1047 goto fail_free_drop;
1048 1048
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..203f9e4a70be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3509 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3511 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3510 3512
3511no_journal: 3513no_journal:
3512 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3514 /*
3515 * The maximum number of concurrent works can be high and
3516 * concurrency isn't really necessary. Limit it to 1.
3517 */
3518 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
3513 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3520 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3514 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3515 goto failed_mount_wq; 3522 goto failed_mount_wq;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1ef16520b950..25b7387ff183 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
145 145
146#ifdef CONFIG_EXT4_FS_SECURITY 146#ifdef CONFIG_EXT4_FS_SECURITY
147extern int ext4_init_security(handle_t *handle, struct inode *inode, 147extern int ext4_init_security(handle_t *handle, struct inode *inode,
148 struct inode *dir); 148 struct inode *dir, const struct qstr *qstr);
149#else 149#else
150static inline int ext4_init_security(handle_t *handle, struct inode *inode, 150static inline int ext4_init_security(handle_t *handle, struct inode *inode,
151 struct inode *dir) 151 struct inode *dir, const struct qstr *qstr)
152{ 152{
153 return 0; 153 return 0;
154} 154}
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 9b21268e121c..007c3bfbf094 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..01e4c1e8e6b6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -190,7 +190,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
190 file_take_write(file); 190 file_take_write(file);
191 WARN_ON(mnt_clone_write(path->mnt)); 191 WARN_ON(mnt_clone_write(path->mnt));
192 } 192 }
193 ima_counts_get(file); 193 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
194 i_readcount_inc(path->dentry->d_inode);
194 return file; 195 return file;
195} 196}
196EXPORT_SYMBOL(alloc_file); 197EXPORT_SYMBOL(alloc_file);
@@ -246,11 +247,15 @@ static void __fput(struct file *file)
246 file->f_op->release(inode, file); 247 file->f_op->release(inode, file);
247 security_file_free(file); 248 security_file_free(file);
248 ima_file_free(file); 249 ima_file_free(file);
249 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 250 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
251 !(file->f_mode & FMODE_PATH))) {
250 cdev_put(inode->i_cdev); 252 cdev_put(inode->i_cdev);
253 }
251 fops_put(file->f_op); 254 fops_put(file->f_op);
252 put_pid(file->f_owner.pid); 255 put_pid(file->f_owner.pid);
253 file_sb_list_del(file); 256 file_sb_list_del(file);
257 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
258 i_readcount_dec(inode);
254 if (file->f_mode & FMODE_WRITE) 259 if (file->f_mode & FMODE_WRITE)
255 drop_file_write_access(file); 260 drop_file_write_access(file);
256 file->f_path.dentry = NULL; 261 file->f_path.dentry = NULL;
@@ -276,11 +281,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 281 rcu_read_lock();
277 file = fcheck_files(files, fd); 282 file = fcheck_files(files, fd);
278 if (file) { 283 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 284 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 285 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 286 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 287 file = NULL;
283 }
284 } 288 }
285 rcu_read_unlock(); 289 rcu_read_unlock();
286 290
@@ -289,6 +293,25 @@ struct file *fget(unsigned int fd)
289 293
290EXPORT_SYMBOL(fget); 294EXPORT_SYMBOL(fget);
291 295
296struct file *fget_raw(unsigned int fd)
297{
298 struct file *file;
299 struct files_struct *files = current->files;
300
301 rcu_read_lock();
302 file = fcheck_files(files, fd);
303 if (file) {
304 /* File object ref couldn't be taken */
305 if (!atomic_long_inc_not_zero(&file->f_count))
306 file = NULL;
307 }
308 rcu_read_unlock();
309
310 return file;
311}
312
313EXPORT_SYMBOL(fget_raw);
314
292/* 315/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 316 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 317 *
@@ -313,6 +336,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 336 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 337 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 338 file = fcheck_files(files, fd);
339 if (file && (file->f_mode & FMODE_PATH))
340 file = NULL;
341 } else {
342 rcu_read_lock();
343 file = fcheck_files(files, fd);
344 if (file) {
345 if (!(file->f_mode & FMODE_PATH) &&
346 atomic_long_inc_not_zero(&file->f_count))
347 *fput_needed = 1;
348 else
349 /* Didn't get the reference, someone's freed */
350 file = NULL;
351 }
352 rcu_read_unlock();
353 }
354
355 return file;
356}
357
358struct file *fget_raw_light(unsigned int fd, int *fput_needed)
359{
360 struct file *file;
361 struct files_struct *files = current->files;
362
363 *fput_needed = 0;
364 if (atomic_read(&files->count) == 1) {
365 file = fcheck_files(files, fd);
316 } else { 366 } else {
317 rcu_read_lock(); 367 rcu_read_lock();
318 file = fcheck_files(files, fd); 368 file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 if (nd && nd->flags & LOOKUP_RCU)
162 return -ECHILD; 162 return -ECHILD;
163 163
164 inode = entry->d_inode; 164 inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a9..cbc07155b1a0 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) 83 if (flags & IPERM_FLAG_RCU) {
84 return -ECHILD; 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD;
86 return -EAGAIN;
87 }
85 88
86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 89 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
87 if (IS_ERR(acl)) 90 if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9b..aad77e4f61b5 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
695 if (error == 0) 695 if (error == 0)
696 return 0; 696 return 0;
697 697
698 unlock_page(page);
698 page_cache_release(page); 699 page_cache_release(page);
699 700
700 gfs2_trans_end(sdp); 701 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef1..ef3dc4b9fae2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
21#include "meta_io.h" 21#include "meta_io.h"
22#include "quota.h" 22#include "quota.h"
23#include "rgrp.h" 23#include "rgrp.h"
24#include "super.h"
24#include "trans.h" 25#include "trans.h"
25#include "dir.h" 26#include "dir.h"
26#include "util.h" 27#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
757 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 758 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
758 struct gfs2_rgrp_list rlist; 759 struct gfs2_rgrp_list rlist;
759 u64 bn, bstart; 760 u64 bn, bstart;
760 u32 blen; 761 u32 blen, btotal;
761 __be64 *p; 762 __be64 *p;
762 unsigned int rg_blocks = 0; 763 unsigned int rg_blocks = 0;
763 int metadata; 764 int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
839 840
840 bstart = 0; 841 bstart = 0;
841 blen = 0; 842 blen = 0;
843 btotal = 0;
842 844
843 for (p = top; p < bottom; p++) { 845 for (p = top; p < bottom; p++) {
844 if (!*p) 846 if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
851 else { 853 else {
852 if (bstart) { 854 if (bstart) {
853 if (metadata) 855 if (metadata)
854 gfs2_free_meta(ip, bstart, blen); 856 __gfs2_free_meta(ip, bstart, blen);
855 else 857 else
856 gfs2_free_data(ip, bstart, blen); 858 __gfs2_free_data(ip, bstart, blen);
859
860 btotal += blen;
857 } 861 }
858 862
859 bstart = bn; 863 bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
865 } 869 }
866 if (bstart) { 870 if (bstart) {
867 if (metadata) 871 if (metadata)
868 gfs2_free_meta(ip, bstart, blen); 872 __gfs2_free_meta(ip, bstart, blen);
869 else 873 else
870 gfs2_free_data(ip, bstart, blen); 874 __gfs2_free_data(ip, bstart, blen);
875
876 btotal += blen;
871 } 877 }
872 878
879 gfs2_statfs_change(sdp, 0, +btotal, 0);
880 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
881 ip->i_inode.i_gid);
882
873 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 883 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
874 884
875 gfs2_dinode_out(ip, dibh->b_data); 885 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb913363..4074b952b059 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
448{ 448{
449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
450 450
451 if (!(file->f_flags & O_NOATIME)) { 451 if (!(file->f_flags & O_NOATIME) &&
452 !IS_NOATIME(&ip->i_inode)) {
452 struct gfs2_holder i_gh; 453 struct gfs2_holder i_gh;
453 int error; 454 int error;
454 455
455 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 456 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
456 error = gfs2_glock_nq(&i_gh); 457 error = gfs2_glock_nq(&i_gh);
457 file_accessed(file); 458 if (error == 0) {
458 if (error == 0) 459 file_accessed(file);
459 gfs2_glock_dq_uninit(&i_gh); 460 gfs2_glock_dq(&i_gh);
461 }
462 gfs2_holder_uninit(&i_gh);
463 if (error)
464 return error;
460 } 465 }
461 vma->vm_ops = &gfs2_vm_ops; 466 vma->vm_ops = &gfs2_vm_ops;
462 vma->vm_flags |= VM_CAN_NONLINEAR; 467 vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
617{ 622{
618 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
619 624
620 page_zero_new_buffers(page, from, to); 625 zero_user(page, from, to-from);
621 flush_dcache_page(page);
622 mark_page_accessed(page); 626 mark_page_accessed(page);
623 627
624 if (!gfs2_is_writeback(ip)) 628 if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
627 block_commit_write(page, from, to); 631 block_commit_write(page, from, to);
628} 632}
629 633
630static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 634static int needs_empty_write(sector_t block, struct inode *inode)
631{ 635{
632 unsigned start, end, next;
633 struct buffer_head *bh, *head;
634 int error; 636 int error;
637 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
635 638
636 if (!page_has_buffers(page)) { 639 bh_map.b_size = 1 << inode->i_blkbits;
637 error = __block_write_begin(page, from, to - from, gfs2_block_map); 640 error = gfs2_block_map(inode, block, &bh_map, 0);
638 if (unlikely(error)) 641 if (unlikely(error))
639 return error; 642 return error;
643 return !buffer_mapped(&bh_map);
644}
640 645
641 empty_write_end(page, from, to); 646static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
642 return 0; 647{
643 } 648 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize;
650 sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
651 int ret;
644 652
645 bh = head = page_buffers(page); 653 blksize = 1 << inode->i_blkbits;
646 next = end = 0; 654 next = end = 0;
647 while (next < from) { 655 while (next < from) {
648 next += bh->b_size; 656 next += blksize;
649 bh = bh->b_this_page; 657 block++;
650 } 658 }
651 start = next; 659 start = next;
652 do { 660 do {
653 next += bh->b_size; 661 next += blksize;
654 if (buffer_mapped(bh)) { 662 ret = needs_empty_write(block, inode);
663 if (unlikely(ret < 0))
664 return ret;
665 if (ret == 0) {
655 if (end) { 666 if (end) {
656 error = __block_write_begin(page, start, end - start, 667 ret = __block_write_begin(page, start, end - start,
657 gfs2_block_map); 668 gfs2_block_map);
658 if (unlikely(error)) 669 if (unlikely(ret))
659 return error; 670 return ret;
660 empty_write_end(page, start, end); 671 empty_write_end(page, start, end);
661 end = 0; 672 end = 0;
662 } 673 }
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
664 } 675 }
665 else 676 else
666 end = next; 677 end = next;
667 bh = bh->b_this_page; 678 block++;
668 } while (next < to); 679 } while (next < to);
669 680
670 if (end) { 681 if (end) {
671 error = __block_write_begin(page, start, end - start, gfs2_block_map); 682 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
672 if (unlikely(error)) 683 if (unlikely(ret))
673 return error; 684 return ret;
674 empty_write_end(page, start, end); 685 empty_write_end(page, start, end);
675 } 686 }
676 687
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
976 987
977 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
978 flock_lock_file_wait(file, fl); 989 flock_lock_file_wait(file, fl);
979 if (fl_gh->gh_gl) 990 if (fl_gh->gh_gl) {
980 gfs2_glock_dq_uninit(fl_gh); 991 gfs2_glock_dq_wait(fl_gh);
992 gfs2_holder_uninit(fl_gh);
993 }
981 mutex_unlock(&fp->f_fl_mutex); 994 mutex_unlock(&fp->f_fl_mutex);
982} 995}
983 996
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7cd9a5a68d59..e2431313491f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/jiffies.h> 28#include <linux/jiffies.h>
29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h>
29 32
30#include "gfs2.h" 33#include "gfs2.h"
31#include "incore.h" 34#include "incore.h"
@@ -41,10 +44,6 @@
41#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
42#include "trace_gfs2.h" 45#include "trace_gfs2.h"
43 46
44struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list;
46};
47
48struct gfs2_glock_iter { 47struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 48 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 49 struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
54 53
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 54typedef void (*glock_examiner) (struct gfs2_glock * gl);
56 55
57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 56static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 57#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
70#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 68#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
71#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 69#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
72 70
73static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 71static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
74static struct dentry *gfs2_root; 72static struct dentry *gfs2_root;
75 73
76/*
77 * Despite what you might think, the numbers below are not arbitrary :-)
78 * They are taken from the ipv4 routing hash code, which is well tested
79 * and thus should be nearly optimal. Later on we might tweek the numbers
80 * but for now this should be fine.
81 *
82 * The reason for putting the locks in a separate array from the list heads
83 * is that we can have fewer locks than list heads and save memory. We use
84 * the same hash function for both, but with a different hash mask.
85 */
86#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87 defined(CONFIG_PROVE_LOCKING)
88
89#ifdef CONFIG_LOCKDEP
90# define GL_HASH_LOCK_SZ 256
91#else
92# if NR_CPUS >= 32
93# define GL_HASH_LOCK_SZ 4096
94# elif NR_CPUS >= 16
95# define GL_HASH_LOCK_SZ 2048
96# elif NR_CPUS >= 8
97# define GL_HASH_LOCK_SZ 1024
98# elif NR_CPUS >= 4
99# define GL_HASH_LOCK_SZ 512
100# else
101# define GL_HASH_LOCK_SZ 256
102# endif
103#endif
104
105/* We never want more locks than chains */
106#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107# undef GL_HASH_LOCK_SZ
108# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109#endif
110
111static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113static inline rwlock_t *gl_lock_addr(unsigned int x)
114{
115 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116}
117#else /* not SMP, so no spinlocks required */
118static inline rwlock_t *gl_lock_addr(unsigned int x)
119{
120 return NULL;
121}
122#endif
123
124/** 74/**
125 * gl_hash() - Turn glock number into hash bucket number 75 * gl_hash() - Turn glock number into hash bucket number
126 * @lock: The glock number 76 * @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
141 return h; 91 return h;
142} 92}
143 93
144/** 94static inline void spin_lock_bucket(unsigned int hash)
145 * glock_free() - Perform a few checks and then release struct gfs2_glock 95{
146 * @gl: The glock to release 96 struct hlist_bl_head *bl = &gl_hash_table[hash];
147 * 97 bit_spin_lock(0, (unsigned long *)bl);
148 * Also calls lock module to release its internal structure for this glock. 98}
149 *
150 */
151 99
152static void glock_free(struct gfs2_glock *gl) 100static inline void spin_unlock_bucket(unsigned int hash)
101{
102 struct hlist_bl_head *bl = &gl_hash_table[hash];
103 __bit_spin_unlock(0, (unsigned long *)bl);
104}
105
106static void gfs2_glock_dealloc(struct rcu_head *rcu)
107{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
109
110 if (gl->gl_ops->go_flags & GLOF_ASPACE)
111 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
112 else
113 kmem_cache_free(gfs2_glock_cachep, gl);
114}
115
116void gfs2_glock_free(struct gfs2_glock *gl)
153{ 117{
154 struct gfs2_sbd *sdp = gl->gl_sbd; 118 struct gfs2_sbd *sdp = gl->gl_sbd;
155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
157 119
158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 120 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
159 trace_gfs2_glock_put(gl); 121 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
160 if (mapping) 122 wake_up(&sdp->sd_glock_wait);
161 cachep = gfs2_glock_aspace_cachep;
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163} 123}
164 124
165/** 125/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
185{ 145{
186 const struct gfs2_glock_operations *glops = gl->gl_ops; 146 const struct gfs2_glock_operations *glops = gl->gl_ops;
187 147
148 /* assert_spin_locked(&gl->gl_spin); */
149
188 if (gl->gl_state == LM_ST_UNLOCKED) 150 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0; 151 return 0;
190 if (!list_empty(&gl->gl_holders)) 152 if (test_bit(GLF_LFLUSH, &gl->gl_flags))
153 return 0;
154 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
155 !list_empty(&gl->gl_holders))
191 return 0; 156 return 0;
192 if (glops->go_demote_ok) 157 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl); 158 return glops->go_demote_ok(gl);
194 return 1; 159 return 1;
195} 160}
196 161
162
197/** 163/**
198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 164 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199 * @gl: the glock 165 * @gl: the glock
200 * 166 *
167 * If the glock is demotable, then we add it (or move it) to the end
168 * of the glock LRU list.
201 */ 169 */
202 170
203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 171static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204{ 172{
205 int may_reclaim; 173 if (demote_ok(gl)) {
206 may_reclaim = (demote_ok(gl) && 174 spin_lock(&lru_lock);
207 (atomic_read(&gl->gl_ref) == 1 || 175
208 (gl->gl_name.ln_type == LM_TYPE_INODE && 176 if (!list_empty(&gl->gl_lru))
209 atomic_read(&gl->gl_ref) <= 2))); 177 list_del_init(&gl->gl_lru);
210 spin_lock(&lru_lock); 178 else
211 if (list_empty(&gl->gl_lru) && may_reclaim) { 179 atomic_inc(&lru_count);
180
212 list_add_tail(&gl->gl_lru, &lru_list); 181 list_add_tail(&gl->gl_lru, &lru_list);
213 atomic_inc(&lru_count); 182 spin_unlock(&lru_lock);
214 } 183 }
215 spin_unlock(&lru_lock); 184}
185
186void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
187{
188 spin_lock(&gl->gl_spin);
189 __gfs2_glock_schedule_for_reclaim(gl);
190 spin_unlock(&gl->gl_spin);
216} 191}
217 192
218/** 193/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{ 202{
228 if (atomic_dec_and_test(&gl->gl_ref)) 203 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1); 204 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231} 205}
232 206
233/** 207/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
236 * 210 *
237 */ 211 */
238 212
239int gfs2_glock_put(struct gfs2_glock *gl) 213void gfs2_glock_put(struct gfs2_glock *gl)
240{ 214{
241 int rv = 0; 215 struct gfs2_sbd *sdp = gl->gl_sbd;
216 struct address_space *mapping = gfs2_glock2aspace(gl);
242 217
243 write_lock(gl_lock_addr(gl->gl_hash)); 218 if (atomic_dec_and_test(&gl->gl_ref)) {
244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { 219 spin_lock_bucket(gl->gl_hash);
245 hlist_del(&gl->gl_list); 220 hlist_bl_del_rcu(&gl->gl_list);
221 spin_unlock_bucket(gl->gl_hash);
222 spin_lock(&lru_lock);
246 if (!list_empty(&gl->gl_lru)) { 223 if (!list_empty(&gl->gl_lru)) {
247 list_del_init(&gl->gl_lru); 224 list_del_init(&gl->gl_lru);
248 atomic_dec(&lru_count); 225 atomic_dec(&lru_count);
249 } 226 }
250 spin_unlock(&lru_lock); 227 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 228 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253 glock_free(gl); 229 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
254 rv = 1; 230 trace_gfs2_glock_put(gl);
255 goto out; 231 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
256 } 232 }
257 spin_lock(&gl->gl_spin);
258 gfs2_glock_schedule_for_reclaim(gl);
259 spin_unlock(&gl->gl_spin);
260 write_unlock(gl_lock_addr(gl->gl_hash));
261out:
262 return rv;
263} 233}
264 234
265/** 235/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
275 const struct lm_lockname *name) 245 const struct lm_lockname *name)
276{ 246{
277 struct gfs2_glock *gl; 247 struct gfs2_glock *gl;
278 struct hlist_node *h; 248 struct hlist_bl_node *h;
279 249
280 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { 250 hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
281 if (!lm_name_equal(&gl->gl_name, name)) 251 if (!lm_name_equal(&gl->gl_name, name))
282 continue; 252 continue;
283 if (gl->gl_sbd != sdp) 253 if (gl->gl_sbd != sdp)
284 continue; 254 continue;
285 255 if (atomic_inc_not_zero(&gl->gl_ref))
286 atomic_inc(&gl->gl_ref); 256 return gl;
287
288 return gl;
289 } 257 }
290 258
291 return NULL; 259 return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 struct gfs2_glock *gl, *tmp; 711 struct gfs2_glock *gl, *tmp;
744 unsigned int hash = gl_hash(sdp, &name); 712 unsigned int hash = gl_hash(sdp, &name);
745 struct address_space *mapping; 713 struct address_space *mapping;
714 struct kmem_cache *cachep;
746 715
747 read_lock(gl_lock_addr(hash)); 716 rcu_read_lock();
748 gl = search_bucket(hash, sdp, &name); 717 gl = search_bucket(hash, sdp, &name);
749 read_unlock(gl_lock_addr(hash)); 718 rcu_read_unlock();
750 719
751 *glp = gl; 720 *glp = gl;
752 if (gl) 721 if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
755 return -ENOENT; 724 return -ENOENT;
756 725
757 if (glops->go_flags & GLOF_ASPACE) 726 if (glops->go_flags & GLOF_ASPACE)
758 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); 727 cachep = gfs2_glock_aspace_cachep;
759 else 728 else
760 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 729 cachep = gfs2_glock_cachep;
730 gl = kmem_cache_alloc(cachep, GFP_KERNEL);
761 if (!gl) 731 if (!gl)
762 return -ENOMEM; 732 return -ENOMEM;
763 733
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
790 mapping->writeback_index = 0; 760 mapping->writeback_index = 0;
791 } 761 }
792 762
793 write_lock(gl_lock_addr(hash)); 763 spin_lock_bucket(hash);
794 tmp = search_bucket(hash, sdp, &name); 764 tmp = search_bucket(hash, sdp, &name);
795 if (tmp) { 765 if (tmp) {
796 write_unlock(gl_lock_addr(hash)); 766 spin_unlock_bucket(hash);
797 glock_free(gl); 767 kmem_cache_free(cachep, gl);
768 atomic_dec(&sdp->sd_glock_disposal);
798 gl = tmp; 769 gl = tmp;
799 } else { 770 } else {
800 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); 771 hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
801 write_unlock(gl_lock_addr(hash)); 772 spin_unlock_bucket(hash);
802 } 773 }
803 774
804 *glp = gl; 775 *glp = gl;
@@ -1007,13 +978,13 @@ fail:
1007 insert_pt = &gh2->gh_list; 978 insert_pt = &gh2->gh_list;
1008 } 979 }
1009 set_bit(GLF_QUEUED, &gl->gl_flags); 980 set_bit(GLF_QUEUED, &gl->gl_flags);
981 trace_gfs2_glock_queue(gh, 1);
1010 if (likely(insert_pt == NULL)) { 982 if (likely(insert_pt == NULL)) {
1011 list_add_tail(&gh->gh_list, &gl->gl_holders); 983 list_add_tail(&gh->gh_list, &gl->gl_holders);
1012 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 984 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1013 goto do_cancel; 985 goto do_cancel;
1014 return; 986 return;
1015 } 987 }
1016 trace_gfs2_glock_queue(gh, 1);
1017 list_add_tail(&gh->gh_list, insert_pt); 988 list_add_tail(&gh->gh_list, insert_pt);
1018do_cancel: 989do_cancel:
1019 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 990 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1113 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1084 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1114 fast_path = 1; 1085 fast_path = 1;
1115 } 1086 }
1087 __gfs2_glock_schedule_for_reclaim(gl);
1116 trace_gfs2_glock_queue(gh, 0); 1088 trace_gfs2_glock_queue(gh, 0);
1117 spin_unlock(&gl->gl_spin); 1089 spin_unlock(&gl->gl_spin);
1118 if (likely(fast_path)) 1090 if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1276 1248
1277void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1249void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1278{ 1250{
1279 unsigned int x; 1251 while (num_gh--)
1280 1252 gfs2_glock_dq(&ghs[num_gh]);
1281 for (x = 0; x < num_gh; x++)
1282 gfs2_glock_dq(&ghs[x]);
1283} 1253}
1284 1254
1285/** 1255/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1291 1261
1292void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) 1262void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1293{ 1263{
1294 unsigned int x; 1264 while (num_gh--)
1295 1265 gfs2_glock_dq_uninit(&ghs[num_gh]);
1296 for (x = 0; x < num_gh; x++)
1297 gfs2_glock_dq_uninit(&ghs[x]);
1298} 1266}
1299 1267
1300void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1268void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
1440 * @sdp: the filesystem 1408 * @sdp: the filesystem
1441 * @bucket: the bucket 1409 * @bucket: the bucket
1442 * 1410 *
1443 * Returns: 1 if the bucket has entries
1444 */ 1411 */
1445 1412
1446static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, 1413static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
1447 unsigned int hash) 1414 unsigned int hash)
1448{ 1415{
1449 struct gfs2_glock *gl, *prev = NULL; 1416 struct gfs2_glock *gl;
1450 int has_entries = 0; 1417 struct hlist_bl_head *head = &gl_hash_table[hash];
1451 struct hlist_head *head = &gl_hash_table[hash].hb_list; 1418 struct hlist_bl_node *pos;
1452 1419
1453 read_lock(gl_lock_addr(hash)); 1420 rcu_read_lock();
1454 /* Can't use hlist_for_each_entry - don't want prefetch here */ 1421 hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
1455 if (hlist_empty(head)) 1422 if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
1456 goto out;
1457 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1458 while(1) {
1459 if (!sdp || gl->gl_sbd == sdp) {
1460 gfs2_glock_hold(gl);
1461 read_unlock(gl_lock_addr(hash));
1462 if (prev)
1463 gfs2_glock_put(prev);
1464 prev = gl;
1465 examiner(gl); 1423 examiner(gl);
1466 has_entries = 1;
1467 read_lock(gl_lock_addr(hash));
1468 }
1469 if (gl->gl_list.next == NULL)
1470 break;
1471 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1472 } 1424 }
1473out: 1425 rcu_read_unlock();
1474 read_unlock(gl_lock_addr(hash));
1475 if (prev)
1476 gfs2_glock_put(prev);
1477 cond_resched(); 1426 cond_resched();
1478 return has_entries; 1427}
1428
1429static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(examiner, sdp, x);
1479} 1435}
1480 1436
1481 1437
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
1529 1485
1530void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1486void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1531{ 1487{
1532 unsigned x; 1488 glock_hash_walk(thaw_glock, sdp);
1489}
1533 1490
1534 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1491static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1535 examine_bucket(thaw_glock, sdp, x); 1492{
1493 int ret;
1494 spin_lock(&gl->gl_spin);
1495 ret = __dump_glock(seq, gl);
1496 spin_unlock(&gl->gl_spin);
1497 return ret;
1498}
1499
1500static void dump_glock_func(struct gfs2_glock *gl)
1501{
1502 dump_glock(NULL, gl);
1536} 1503}
1537 1504
1538/** 1505/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1545 1512
1546void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1513void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1547{ 1514{
1548 unsigned int x; 1515 glock_hash_walk(clear_glock, sdp);
1549
1550 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1551 examine_bucket(clear_glock, sdp, x);
1552 flush_workqueue(glock_workqueue); 1516 flush_workqueue(glock_workqueue);
1553 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1517 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1554 gfs2_dump_lockstate(sdp); 1518 glock_hash_walk(dump_glock_func, sdp);
1555} 1519}
1556 1520
1557void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1521void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1681,15 @@ out:
1717 return error; 1681 return error;
1718} 1682}
1719 1683
1720static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1721{
1722 int ret;
1723 spin_lock(&gl->gl_spin);
1724 ret = __dump_glock(seq, gl);
1725 spin_unlock(&gl->gl_spin);
1726 return ret;
1727}
1728 1684
1729/**
1730 * gfs2_dump_lockstate - print out the current lockstate
1731 * @sdp: the filesystem
1732 * @ub: the buffer to copy the information into
1733 *
1734 * If @ub is NULL, dump the lockstate to the console.
1735 *
1736 */
1737
1738static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1739{
1740 struct gfs2_glock *gl;
1741 struct hlist_node *h;
1742 unsigned int x;
1743 int error = 0;
1744
1745 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1746
1747 read_lock(gl_lock_addr(x));
1748
1749 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1750 if (gl->gl_sbd != sdp)
1751 continue;
1752
1753 error = dump_glock(NULL, gl);
1754 if (error)
1755 break;
1756 }
1757
1758 read_unlock(gl_lock_addr(x));
1759
1760 if (error)
1761 break;
1762 }
1763
1764
1765 return error;
1766}
1767 1685
1768 1686
1769int __init gfs2_glock_init(void) 1687int __init gfs2_glock_init(void)
1770{ 1688{
1771 unsigned i; 1689 unsigned i;
1772 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { 1690 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1773 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); 1691 INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
1774 }
1775#ifdef GL_HASH_LOCK_SZ
1776 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1777 rwlock_init(&gl_hash_locks[i]);
1778 } 1692 }
1779#endif
1780 1693
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1694 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZABLE, 0); 1695 WQ_HIGHPRI | WQ_FREEZABLE, 0);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
1802 destroy_workqueue(gfs2_delete_workqueue); 1715 destroy_workqueue(gfs2_delete_workqueue);
1803} 1716}
1804 1717
1718static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
1719{
1720 return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
1721 struct gfs2_glock, gl_list);
1722}
1723
1724static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
1725{
1726 return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
1727 struct gfs2_glock, gl_list);
1728}
1729
1805static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1730static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1806{ 1731{
1807 struct gfs2_glock *gl; 1732 struct gfs2_glock *gl;
1808 1733
1809restart: 1734 do {
1810 read_lock(gl_lock_addr(gi->hash)); 1735 gl = gi->gl;
1811 gl = gi->gl; 1736 if (gl) {
1812 if (gl) { 1737 gi->gl = glock_hash_next(gl);
1813 gi->gl = hlist_entry(gl->gl_list.next, 1738 } else {
1814 struct gfs2_glock, gl_list); 1739 gi->gl = glock_hash_chain(gi->hash);
1815 } else { 1740 }
1816 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 1741 while (gi->gl == NULL) {
1817 struct gfs2_glock, gl_list); 1742 gi->hash++;
1818 } 1743 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1819 if (gi->gl) 1744 rcu_read_unlock();
1820 gfs2_glock_hold(gi->gl); 1745 return 1;
1821 read_unlock(gl_lock_addr(gi->hash)); 1746 }
1822 if (gl) 1747 gi->gl = glock_hash_chain(gi->hash);
1823 gfs2_glock_put(gl); 1748 }
1824 while (gi->gl == NULL) { 1749 /* Skip entries for other sb and dead entries */
1825 gi->hash++; 1750 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
1826 if (gi->hash >= GFS2_GL_HASH_SIZE)
1827 return 1;
1828 read_lock(gl_lock_addr(gi->hash));
1829 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1830 struct gfs2_glock, gl_list);
1831 if (gi->gl)
1832 gfs2_glock_hold(gi->gl);
1833 read_unlock(gl_lock_addr(gi->hash));
1834 }
1835
1836 if (gi->sdp != gi->gl->gl_sbd)
1837 goto restart;
1838 1751
1839 return 0; 1752 return 0;
1840} 1753}
1841 1754
1842static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1843{
1844 if (gi->gl)
1845 gfs2_glock_put(gi->gl);
1846 gi->gl = NULL;
1847}
1848
1849static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1755static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1850{ 1756{
1851 struct gfs2_glock_iter *gi = seq->private; 1757 struct gfs2_glock_iter *gi = seq->private;
1852 loff_t n = *pos; 1758 loff_t n = *pos;
1853 1759
1854 gi->hash = 0; 1760 gi->hash = 0;
1761 rcu_read_lock();
1855 1762
1856 do { 1763 do {
1857 if (gfs2_glock_iter_next(gi)) { 1764 if (gfs2_glock_iter_next(gi))
1858 gfs2_glock_iter_free(gi);
1859 return NULL; 1765 return NULL;
1860 }
1861 } while (n--); 1766 } while (n--);
1862 1767
1863 return gi->gl; 1768 return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1870 1775
1871 (*pos)++; 1776 (*pos)++;
1872 1777
1873 if (gfs2_glock_iter_next(gi)) { 1778 if (gfs2_glock_iter_next(gi))
1874 gfs2_glock_iter_free(gi);
1875 return NULL; 1779 return NULL;
1876 }
1877 1780
1878 return gi->gl; 1781 return gi->gl;
1879} 1782}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1881static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1784static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1882{ 1785{
1883 struct gfs2_glock_iter *gi = seq->private; 1786 struct gfs2_glock_iter *gi = seq->private;
1884 gfs2_glock_iter_free(gi); 1787
1788 if (gi->gl)
1789 rcu_read_unlock();
1790 gi->gl = NULL;
1885} 1791}
1886 1792
1887static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1793static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb615..aea160690e94 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
119 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
120 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct gfs2_glock *gl);
122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
123 unsigned int flags); 123 unsigned int flags);
124 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
174 int create, struct gfs2_glock **glp); 174 int create, struct gfs2_glock **glp);
175void gfs2_glock_hold(struct gfs2_glock *gl); 175void gfs2_glock_hold(struct gfs2_glock *gl);
176void gfs2_glock_put_nolock(struct gfs2_glock *gl); 176void gfs2_glock_put_nolock(struct gfs2_glock *gl);
177int gfs2_glock_put(struct gfs2_glock *gl); 177void gfs2_glock_put(struct gfs2_glock *gl);
178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
179 struct gfs2_holder *gh); 179 struct gfs2_holder *gh);
180void gfs2_holder_reinit(unsigned int state, unsigned flags, 180void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
223 return error; 223 return error;
224} 224}
225 225
226/* Lock Value Block functions */ 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228int gfs2_lvb_hold(struct gfs2_glock *gl); 228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229void gfs2_lvb_unhold(struct gfs2_glock *gl); 229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230 230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
233void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 233extern void gfs2_glock_free(struct gfs2_glock *gl);
234void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 234
235void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 235extern int __init gfs2_glock_init(void);
236void gfs2_glock_thaw(struct gfs2_sbd *sdp); 236extern void gfs2_glock_exit(void);
237 237
238int __init gfs2_glock_init(void); 238extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
239void gfs2_glock_exit(void); 239extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
240 240extern int gfs2_register_debugfs(void);
241int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 241extern void gfs2_unregister_debugfs(void);
242void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
243int gfs2_register_debugfs(void);
244void gfs2_unregister_debugfs(void);
245 242
246extern const struct lm_lockops gfs2_dlm_ops; 243extern const struct lm_lockops gfs2_dlm_ops;
247 244
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a50..3754e3cbf02b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
56 BUG_ON(current->journal_info); 56 BUG_ON(current->journal_info);
57 current->journal_info = &tr; 57 current->journal_info = &tr;
58 58
59 gfs2_log_lock(sdp); 59 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 60 while (!list_empty(head)) {
61 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
62 bd_ail_gl_list); 62 bd_ail_gl_list);
63 bh = bd->bd_bh; 63 bh = bd->bd_bh;
64 gfs2_remove_from_ail(bd); 64 gfs2_remove_from_ail(bd);
65 spin_unlock(&sdp->sd_ail_lock);
66
65 bd->bd_bh = NULL; 67 bd->bd_bh = NULL;
66 bh->b_private = NULL; 68 bh->b_private = NULL;
67 bd->bd_blkno = bh->b_blocknr; 69 bd->bd_blkno = bh->b_blocknr;
70 gfs2_log_lock(sdp);
68 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 71 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
69 gfs2_trans_add_revoke(sdp, bd); 72 gfs2_trans_add_revoke(sdp, bd);
73 gfs2_log_unlock(sdp);
74
75 spin_lock(&sdp->sd_ail_lock);
70 } 76 }
71 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
72 gfs2_log_unlock(sdp); 78 spin_unlock(&sdp->sd_ail_lock);
73 79
74 gfs2_trans_end(sdp); 80 gfs2_trans_end(sdp);
75 gfs2_log_flush(sdp, NULL); 81 gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
206static int inode_go_demote_ok(const struct gfs2_glock *gl) 212static int inode_go_demote_ok(const struct gfs2_glock *gl)
207{ 213{
208 struct gfs2_sbd *sdp = gl->gl_sbd; 214 struct gfs2_sbd *sdp = gl->gl_sbd;
215 struct gfs2_holder *gh;
216
209 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 217 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
210 return 0; 218 return 0;
219
220 if (!list_empty(&gl->gl_holders)) {
221 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
222 if (gh->gh_list.next != &gl->gl_holders)
223 return 0;
224 }
225
211 return 1; 226 return 1;
212} 227}
213 228
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
272} 287}
273 288
274/** 289/**
275 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
276 * @gl: the glock
277 *
278 * Returns: 1 if it's ok
279 */
280
281static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
282{
283 const struct address_space *mapping = (const struct address_space *)(gl + 1);
284 return !mapping->nrpages;
285}
286
287/**
288 * rgrp_go_lock - operation done after an rgrp lock is locked by 290 * rgrp_go_lock - operation done after an rgrp lock is locked by
289 * a first holder on this node. 291 * a first holder on this node.
290 * @gl: the glock 292 * @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
410const struct gfs2_glock_operations gfs2_rgrp_glops = { 412const struct gfs2_glock_operations gfs2_rgrp_glops = {
411 .go_xmote_th = rgrp_go_sync, 413 .go_xmote_th = rgrp_go_sync,
412 .go_inval = rgrp_go_inval, 414 .go_inval = rgrp_go_inval,
413 .go_demote_ok = rgrp_go_demote_ok,
414 .go_lock = rgrp_go_lock, 415 .go_lock = rgrp_go_lock,
415 .go_unlock = rgrp_go_unlock, 416 .go_unlock = rgrp_go_unlock,
416 .go_dump = gfs2_rgrp_dump, 417 .go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c06275..870a89d6d4dc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/dlm.h> 16#include <linux/dlm.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/rcupdate.h>
19#include <linux/rculist_bl.h>
18 20
19#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
20#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
201}; 203};
202 204
203struct gfs2_glock { 205struct gfs2_glock {
204 struct hlist_node gl_list; 206 struct hlist_bl_node gl_list;
205 unsigned long gl_flags; /* GLF_... */ 207 unsigned long gl_flags; /* GLF_... */
206 struct lm_lockname gl_name; 208 struct lm_lockname gl_name;
207 atomic_t gl_ref; 209 atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
234 atomic_t gl_ail_count; 236 atomic_t gl_ail_count;
235 struct delayed_work gl_work; 237 struct delayed_work gl_work;
236 struct work_struct gl_delete; 238 struct work_struct gl_delete;
239 struct rcu_head gl_rcu;
237}; 240};
238 241
239#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 242#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
314 QDF_USER = 0, 317 QDF_USER = 0,
315 QDF_CHANGE = 1, 318 QDF_CHANGE = 1,
316 QDF_LOCKED = 2, 319 QDF_LOCKED = 2,
320 QDF_REFRESH = 3,
317}; 321};
318 322
319struct gfs2_quota_data { 323struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
647 unsigned int sd_log_flush_head; 651 unsigned int sd_log_flush_head;
648 u64 sd_log_flush_wrapped; 652 u64 sd_log_flush_wrapped;
649 653
654 spinlock_t sd_ail_lock;
650 struct list_head sd_ail1_list; 655 struct list_head sd_ail1_list;
651 struct list_head sd_ail2_list; 656 struct list_head sd_ail2_list;
652 u64 sd_ail_sync_gen; 657 u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7aa7d4f8984a..97d54a28776a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -763,14 +763,15 @@ fail:
763 return error; 763 return error;
764} 764}
765 765
766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
767 const struct qstr *qstr)
767{ 768{
768 int err; 769 int err;
769 size_t len; 770 size_t len;
770 void *value; 771 void *value;
771 char *name; 772 char *name;
772 773
773 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 774 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
774 &name, &value, &len); 775 &name, &value, &len);
775 776
776 if (err) { 777 if (err) {
@@ -854,7 +855,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
854 if (error) 855 if (error)
855 goto fail_gunlock2; 856 goto fail_gunlock2;
856 857
857 error = gfs2_security_init(dip, GFS2_I(inode)); 858 error = gfs2_security_init(dip, GFS2_I(inode), name);
858 if (error) 859 if (error)
859 goto fail_gunlock2; 860 goto fail_gunlock2;
860 861
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f8..98c80d8c2a62 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
22{ 22{
23 struct gfs2_glock *gl = arg; 23 struct gfs2_glock *gl = arg;
24 unsigned ret = gl->gl_state; 24 unsigned ret = gl->gl_state;
25 struct gfs2_sbd *sdp = gl->gl_sbd;
26 25
27 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
28 27
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
31 30
32 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
33 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
34 if (gl->gl_ops->go_flags & GLOF_ASPACE) 33 gfs2_glock_free(gl);
35 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
36 else
37 kmem_cache_free(gfs2_glock_cachep, gl);
38 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
39 wake_up(&sdp->sd_glock_wait);
40 return; 34 return;
41 case -DLM_ECANCEL: /* Cancel while getting lock */ 35 case -DLM_ECANCEL: /* Cancel while getting lock */
42 ret |= LM_OUT_CANCELED; 36 ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 158 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
165} 159}
166 160
167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 161static void gdlm_put_lock(struct gfs2_glock *gl)
168{ 162{
169 struct gfs2_sbd *sdp = gl->gl_sbd; 163 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 164 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 165 int error;
172 166
173 if (gl->gl_lksb.sb_lkid == 0) { 167 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 168 gfs2_glock_free(gl);
175 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
176 wake_up(&sdp->sd_glock_wait);
177 return; 169 return;
178 } 170 }
179 171
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e10..e7ed31f858dd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
67 * @mapping: The associated mapping (maybe NULL) 67 * @mapping: The associated mapping (maybe NULL)
68 * @bd: The gfs2_bufdata to remove 68 * @bd: The gfs2_bufdata to remove
69 * 69 *
70 * The log lock _must_ be held when calling this function 70 * The ail lock _must_ be held when calling this function
71 * 71 *
72 */ 72 */
73 73
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
88 */ 88 */
89 89
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
91__releases(&sdp->sd_log_lock) 91__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_log_lock) 92__acquires(&sdp->sd_ail_lock)
93{ 93{
94 struct gfs2_bufdata *bd, *s; 94 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 95 struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 118
119 get_bh(bh); 119 get_bh(bh);
120 gfs2_log_unlock(sdp); 120 spin_unlock(&sdp->sd_ail_lock);
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
128 } 128 }
129 gfs2_log_lock(sdp); 129 spin_lock(&sdp->sd_ail_lock);
130 130
131 retry = 1; 131 retry = 1;
132 break; 132 break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
175 struct gfs2_ail *ai; 175 struct gfs2_ail *ai;
176 int done = 0; 176 int done = 0;
177 177
178 gfs2_log_lock(sdp); 178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list; 179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) { 180 if (list_empty(head)) {
181 gfs2_log_unlock(sdp); 181 spin_unlock(&sdp->sd_ail_lock);
182 return; 182 return;
183 } 183 }
184 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
189 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
190 continue; 190 continue;
191 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ 192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0; 193 done = 0;
194 break; 194 break;
195 } 195 }
196 } 196 }
197 197
198 gfs2_log_unlock(sdp); 198 spin_unlock(&sdp->sd_ail_lock);
199} 199}
200 200
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
203 struct gfs2_ail *ai, *s; 203 struct gfs2_ail *ai, *s;
204 int ret; 204 int ret;
205 205
206 gfs2_log_lock(sdp); 206 spin_lock(&sdp->sd_ail_lock);
207 207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 209 if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
214 214
215 ret = list_empty(&sdp->sd_ail1_list); 215 ret = list_empty(&sdp->sd_ail1_list);
216 216
217 gfs2_log_unlock(sdp); 217 spin_unlock(&sdp->sd_ail_lock);
218 218
219 return ret; 219 return ret;
220} 220}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
247 int wrap = (new_tail < old_tail); 247 int wrap = (new_tail < old_tail);
248 int a, b, rm; 248 int a, b, rm;
249 249
250 gfs2_log_lock(sdp); 250 spin_lock(&sdp->sd_ail_lock);
251 251
252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { 252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
253 a = (old_tail <= ai->ai_first); 253 a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
263 kfree(ai); 263 kfree(ai);
264 } 264 }
265 265
266 gfs2_log_unlock(sdp); 266 spin_unlock(&sdp->sd_ail_lock);
267} 267}
268 268
269/** 269/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
421 struct gfs2_ail *ai; 421 struct gfs2_ail *ai;
422 unsigned int tail; 422 unsigned int tail;
423 423
424 gfs2_log_lock(sdp); 424 spin_lock(&sdp->sd_ail_lock);
425 425
426 if (list_empty(&sdp->sd_ail1_list)) { 426 if (list_empty(&sdp->sd_ail1_list)) {
427 tail = sdp->sd_log_head; 427 tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
430 tail = ai->ai_first; 430 tail = ai->ai_first;
431 } 431 }
432 432
433 gfs2_log_unlock(sdp); 433 spin_unlock(&sdp->sd_ail_lock);
434 434
435 return tail; 435 return tail;
436} 436}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
743 sdp->sd_log_commited_databuf = 0; 743 sdp->sd_log_commited_databuf = 0;
744 sdp->sd_log_commited_revoke = 0; 744 sdp->sd_log_commited_revoke = 0;
745 745
746 spin_lock(&sdp->sd_ail_lock);
746 if (!list_empty(&ai->ai_ail1_list)) { 747 if (!list_empty(&ai->ai_ail1_list)) {
747 list_add(&ai->ai_list, &sdp->sd_ail1_list); 748 list_add(&ai->ai_list, &sdp->sd_ail1_list);
748 ai = NULL; 749 ai = NULL;
749 } 750 }
751 spin_unlock(&sdp->sd_ail_lock);
750 gfs2_log_unlock(sdp); 752 gfs2_log_unlock(sdp);
751 trace_gfs2_log_flush(sdp, 0); 753 trace_gfs2_log_flush(sdp, 0);
752 up_write(&sdp->sd_log_flush_lock); 754 up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058d..e919abf25ecd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
51 /* If this buffer is in the AIL and it has already been written 51 /* If this buffer is in the AIL and it has already been written
52 * to in-place disk block, remove it from the AIL. 52 * to in-place disk block, remove it from the AIL.
53 */ 53 */
54 spin_lock(&sdp->sd_ail_lock);
54 if (bd->bd_ail) 55 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 56 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
57 spin_unlock(&sdp->sd_ail_lock);
56 get_bh(bh); 58 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned); 59 atomic_inc(&sdp->sd_log_pinned);
58 trace_gfs2_pin(bd, 1); 60 trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
80 mark_buffer_dirty(bh); 82 mark_buffer_dirty(bh);
81 clear_buffer_pinned(bh); 83 clear_buffer_pinned(bh);
82 84
83 gfs2_log_lock(sdp); 85 spin_lock(&sdp->sd_ail_lock);
84 if (bd->bd_ail) { 86 if (bd->bd_ail) {
85 list_del(&bd->bd_ail_st_list); 87 list_del(&bd->bd_ail_st_list);
86 brelse(bh); 88 brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
91 } 93 }
92 bd->bd_ail = ai; 94 bd->bd_ail = ai;
93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 96 spin_unlock(&sdp->sd_ail_lock);
97
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
95 trace_gfs2_pin(bd, 0); 100 trace_gfs2_pin(bd, 0);
96 gfs2_log_unlock(sdp);
97 unlock_buffer(bh); 101 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned); 102 atomic_dec(&sdp->sd_log_pinned);
99} 103}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4d..888a5f5a1a58 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h>
17#include <asm/atomic.h> 19#include <asm/atomic.h>
18 20
19#include "gfs2.h" 21#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
45{ 47{
46 struct gfs2_glock *gl = foo; 48 struct gfs2_glock *gl = foo;
47 49
48 INIT_HLIST_NODE(&gl->gl_list); 50 INIT_HLIST_BL_NODE(&gl->gl_list);
49 spin_lock_init(&gl->gl_spin); 51 spin_lock_init(&gl->gl_spin);
50 INIT_LIST_HEAD(&gl->gl_holders); 52 INIT_LIST_HEAD(&gl->gl_holders);
51 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
@@ -59,14 +61,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 61 struct address_space *mapping = (struct address_space *)(gl + 1);
60 62
61 gfs2_init_glock_once(gl); 63 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 64 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 65}
71 66
72/** 67/**
@@ -198,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
198 unregister_filesystem(&gfs2meta_fs_type); 193 unregister_filesystem(&gfs2meta_fs_type);
199 destroy_workqueue(gfs_recovery_wq); 194 destroy_workqueue(gfs_recovery_wq);
200 195
196 rcu_barrier();
197
201 kmem_cache_destroy(gfs2_quotad_cachep); 198 kmem_cache_destroy(gfs2_quotad_cachep);
202 kmem_cache_destroy(gfs2_rgrpd_cachep); 199 kmem_cache_destroy(gfs2_rgrpd_cachep);
203 kmem_cache_destroy(gfs2_bufdata_cachep); 200 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f9..01d97f486553 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
326 brelse(bh); 326 brelse(bh);
327 } 327 }
328 if (bd) { 328 if (bd) {
329 spin_lock(&sdp->sd_ail_lock);
329 if (bd->bd_ail) { 330 if (bd->bd_ail) {
330 gfs2_remove_from_ail(bd); 331 gfs2_remove_from_ail(bd);
331 bh->b_private = NULL; 332 bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
333 bd->bd_blkno = bh->b_blocknr; 334 bd->bd_blkno = bh->b_blocknr;
334 gfs2_trans_add_revoke(sdp, bd); 335 gfs2_trans_add_revoke(sdp, bd);
335 } 336 }
337 spin_unlock(&sdp->sd_ail_lock);
336 } 338 }
337 clear_buffer_dirty(bh); 339 clear_buffer_dirty(bh);
338 clear_buffer_uptodate(bh); 340 clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f79..42ef24355afb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
99 99
100 init_waitqueue_head(&sdp->sd_log_waitq); 100 init_waitqueue_head(&sdp->sd_log_waitq);
101 init_waitqueue_head(&sdp->sd_logd_waitq); 101 init_waitqueue_head(&sdp->sd_logd_waitq);
102 spin_lock_init(&sdp->sd_ail_lock);
102 INIT_LIST_HEAD(&sdp->sd_ail1_list); 103 INIT_LIST_HEAD(&sdp->sd_ail1_list);
103 INIT_LIST_HEAD(&sdp->sd_ail2_list); 104 INIT_LIST_HEAD(&sdp->sd_ail2_list);
104 105
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
928 { Opt_err, NULL }, 929 { Opt_err, NULL },
929}; 930};
930 931
931static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
932{
933 struct gfs2_sbd *sdp = gl->gl_sbd;
934 kmem_cache_free(cachep, gl);
935 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
936 wake_up(&sdp->sd_glock_wait);
937}
938
939static const struct lm_lockops nolock_ops = { 932static const struct lm_lockops nolock_ops = {
940 .lm_proto_name = "lock_nolock", 933 .lm_proto_name = "lock_nolock",
941 .lm_put_lock = nolock_put_lock, 934 .lm_put_lock = gfs2_glock_free,
942 .lm_tokens = &nolock_tokens, 935 .lm_tokens = &nolock_tokens,
943}; 936};
944 937
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20b..09e436a50723 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1026 1026
1027/** 1027/**
1028 * gfs2_permission - 1028 * gfs2_permission -
1029 * @inode: 1029 * @inode: The inode
1030 * @mask: 1030 * @mask: The mask to be tested
1031 * @nd: passed from Linux VFS, ignored by us 1031 * @flags: Indicates whether this is an RCU path walk or not
1032 * 1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the 1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only 1034 * inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1044 int error; 1044 int error;
1045 int unlock = 0; 1045 int unlock = 0;
1046 1046
1047 if (flags & IPERM_FLAG_RCU)
1048 return -ECHILD;
1049 1047
1050 ip = GFS2_I(inode); 1048 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1053 if (error)
1054 return error; 1054 return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963de..e23d9864c418 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
834 goto out_end_trans; 834 goto out_end_trans;
835 835
836 do_qc(qd, -qd->qd_change_sync); 836 do_qc(qd, -qd->qd_change_sync);
837 set_bit(QDF_REFRESH, &qd->qd_flags);
837 } 838 }
838 839
839 error = 0; 840 error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
929{ 930{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 931 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_alloc *al = ip->i_alloc; 932 struct gfs2_alloc *al = ip->i_alloc;
933 struct gfs2_quota_data *qd;
932 unsigned int x; 934 unsigned int x;
933 int error = 0; 935 int error = 0;
934 936
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
942 sort_qd, NULL); 944 sort_qd, NULL);
943 945
944 for (x = 0; x < al->al_qd_num; x++) { 946 for (x = 0; x < al->al_qd_num; x++) {
945 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); 947 int force = NO_FORCE;
948 qd = al->al_qd[x];
949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
950 force = FORCE;
951 error = do_glock(qd, force, &al->al_qd_ghs[x]);
946 if (error) 952 if (error)
947 break; 953 break;
948 } 954 }
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1587 1593
1588 offset = qd2offset(qd); 1594 offset = qd2offset(qd);
1589 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); 1595 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1596 if (gfs2_is_stuffed(ip))
1597 alloc_required = 1;
1590 if (alloc_required) { 1598 if (alloc_required) {
1591 al = gfs2_alloc_get(ip); 1599 al = gfs2_alloc_get(ip);
1592 if (al == NULL) 1600 if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1600 blocks += gfs2_rg_blocks(al); 1608 blocks += gfs2_rg_blocks(al);
1601 } 1609 }
1602 1610
1603 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); 1611 /* Some quotas span block boundaries and can update two blocks,
1612 adding an extra block to the transaction to handle such quotas */
1613 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
1604 if (error) 1614 if (error)
1605 goto out_release; 1615 goto out_release;
1606 1616
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020c..cf930cd9664a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
1602 * 1602 *
1603 */ 1603 */
1604 1604
1605void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) 1605void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1606{ 1606{
1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1608 struct gfs2_rgrpd *rgd; 1608 struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1618
1619 gfs2_trans_add_rg(rgd); 1619 gfs2_trans_add_rg(rgd);
1620}
1620 1621
1622/**
1623 * gfs2_free_data - free a contiguous run of data block(s)
1624 * @ip: the inode these blocks are being freed from
1625 * @bstart: first block of a run of contiguous blocks
1626 * @blen: the length of the block run
1627 *
1628 */
1629
1630void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1631{
1632 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1633
1634 __gfs2_free_data(ip, bstart, blen);
1621 gfs2_statfs_change(sdp, 0, +blen, 0); 1635 gfs2_statfs_change(sdp, 0, +blen, 0);
1622 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1636 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1623} 1637}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1630 * 1644 *
1631 */ 1645 */
1632 1646
1633void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1647void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1634{ 1648{
1635 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1636 struct gfs2_rgrpd *rgd; 1650 struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1659 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1646 1660
1647 gfs2_trans_add_rg(rgd); 1661 gfs2_trans_add_rg(rgd);
1662 gfs2_meta_wipe(ip, bstart, blen);
1663}
1648 1664
1665/**
1666 * gfs2_free_meta - free a contiguous run of data block(s)
1667 * @ip: the inode these blocks are being freed from
1668 * @bstart: first block of a run of contiguous blocks
1669 * @blen: the length of the block run
1670 *
1671 */
1672
1673void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1674{
1675 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1676
1677 __gfs2_free_meta(ip, bstart, blen);
1649 gfs2_statfs_change(sdp, 0, +blen, 0); 1678 gfs2_statfs_change(sdp, 0, +blen, 0);
1650 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1679 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1651 gfs2_meta_wipe(ip, bstart, blen);
1652} 1680}
1653 1681
1654void gfs2_unlink_di(struct inode *inode) 1682void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369c..a80e3034ac47 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
54 54
55extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
55extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 56extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
56extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 58extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 59extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
58extern void gfs2_unlink_di(struct inode *inode); 60extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 63b6f5632318..0c39dc3ef7d7 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,7 @@
1config HPFS_FS 1config HPFS_FS
2 tristate "OS/2 HPFS file system support" 2 tristate "OS/2 HPFS file system support"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BKL # nontrivial to fix 4 depends on BROKEN || !PREEMPT
5 help 5 help
6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS 6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
7 is the file system used for organizing files on OS/2 hard disk 7 is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index d32f63a569f7..b3d7c0ddb609 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,16 +6,15 @@
6 * directory VFS functions 6 * directory VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include "hpfs_fn.h" 10#include "hpfs_fn.h"
12 11
13static int hpfs_dir_release(struct inode *inode, struct file *filp) 12static int hpfs_dir_release(struct inode *inode, struct file *filp)
14{ 13{
15 lock_kernel(); 14 hpfs_lock(inode->i_sb);
16 hpfs_del_pos(inode, &filp->f_pos); 15 hpfs_del_pos(inode, &filp->f_pos);
17 /*hpfs_write_if_changed(inode);*/ 16 /*hpfs_write_if_changed(inode);*/
18 unlock_kernel(); 17 hpfs_unlock(inode->i_sb);
19 return 0; 18 return 0;
20} 19}
21 20
@@ -30,7 +29,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
30 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
31 struct super_block *s = i->i_sb; 30 struct super_block *s = i->i_sb;
32 31
33 lock_kernel(); 32 hpfs_lock(s);
34 33
35 /*printk("dir lseek\n");*/ 34 /*printk("dir lseek\n");*/
36 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; 35 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
@@ -43,12 +42,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
43 } 42 }
44 mutex_unlock(&i->i_mutex); 43 mutex_unlock(&i->i_mutex);
45ok: 44ok:
46 unlock_kernel(); 45 hpfs_unlock(s);
47 return filp->f_pos = new_off; 46 return filp->f_pos = new_off;
48fail: 47fail:
49 mutex_unlock(&i->i_mutex); 48 mutex_unlock(&i->i_mutex);
50 /*printk("illegal lseek: %016llx\n", new_off);*/ 49 /*printk("illegal lseek: %016llx\n", new_off);*/
51 unlock_kernel(); 50 hpfs_unlock(s);
52 return -ESPIPE; 51 return -ESPIPE;
53} 52}
54 53
@@ -64,7 +63,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
64 int c1, c2 = 0; 63 int c1, c2 = 0;
65 int ret = 0; 64 int ret = 0;
66 65
67 lock_kernel(); 66 hpfs_lock(inode->i_sb);
68 67
69 if (hpfs_sb(inode->i_sb)->sb_chk) { 68 if (hpfs_sb(inode->i_sb)->sb_chk) {
70 if (hpfs_chk_sectors(inode->i_sb, inode->i_ino, 1, "dir_fnode")) { 69 if (hpfs_chk_sectors(inode->i_sb, inode->i_ino, 1, "dir_fnode")) {
@@ -167,7 +166,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
167 hpfs_brelse4(&qbh); 166 hpfs_brelse4(&qbh);
168 } 167 }
169out: 168out:
170 unlock_kernel(); 169 hpfs_unlock(inode->i_sb);
171 return ret; 170 return ret;
172} 171}
173 172
@@ -197,10 +196,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
197 struct inode *result = NULL; 196 struct inode *result = NULL;
198 struct hpfs_inode_info *hpfs_result; 197 struct hpfs_inode_info *hpfs_result;
199 198
200 lock_kernel(); 199 hpfs_lock(dir->i_sb);
201 if ((err = hpfs_chk_name(name, &len))) { 200 if ((err = hpfs_chk_name(name, &len))) {
202 if (err == -ENAMETOOLONG) { 201 if (err == -ENAMETOOLONG) {
203 unlock_kernel(); 202 hpfs_unlock(dir->i_sb);
204 return ERR_PTR(-ENAMETOOLONG); 203 return ERR_PTR(-ENAMETOOLONG);
205 } 204 }
206 goto end_add; 205 goto end_add;
@@ -298,7 +297,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
298 297
299 end: 298 end:
300 end_add: 299 end_add:
301 unlock_kernel(); 300 hpfs_unlock(dir->i_sb);
302 d_add(dentry, result); 301 d_add(dentry, result);
303 return NULL; 302 return NULL;
304 303
@@ -311,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
311 310
312 /*bail:*/ 311 /*bail:*/
313 312
314 unlock_kernel(); 313 hpfs_unlock(dir->i_sb);
315 return ERR_PTR(-ENOENT); 314 return ERR_PTR(-ENOENT);
316} 315}
317 316
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c0340887c7ea..2dbae20450f8 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,16 +6,15 @@
6 * file VFS functions 6 * file VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include "hpfs_fn.h" 9#include "hpfs_fn.h"
11 10
12#define BLOCKS(size) (((size) + 511) >> 9) 11#define BLOCKS(size) (((size) + 511) >> 9)
13 12
14static int hpfs_file_release(struct inode *inode, struct file *file) 13static int hpfs_file_release(struct inode *inode, struct file *file)
15{ 14{
16 lock_kernel(); 15 hpfs_lock(inode->i_sb);
17 hpfs_write_if_changed(inode); 16 hpfs_write_if_changed(inode);
18 unlock_kernel(); 17 hpfs_unlock(inode->i_sb);
19 return 0; 18 return 0;
20} 19}
21 20
@@ -49,14 +48,14 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
49static void hpfs_truncate(struct inode *i) 48static void hpfs_truncate(struct inode *i)
50{ 49{
51 if (IS_IMMUTABLE(i)) return /*-EPERM*/; 50 if (IS_IMMUTABLE(i)) return /*-EPERM*/;
52 lock_kernel(); 51 hpfs_lock(i->i_sb);
53 hpfs_i(i)->i_n_secs = 0; 52 hpfs_i(i)->i_n_secs = 0;
54 i->i_blocks = 1 + ((i->i_size + 511) >> 9); 53 i->i_blocks = 1 + ((i->i_size + 511) >> 9);
55 hpfs_i(i)->mmu_private = i->i_size; 54 hpfs_i(i)->mmu_private = i->i_size;
56 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); 55 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
57 hpfs_write_inode(i); 56 hpfs_write_inode(i);
58 hpfs_i(i)->i_n_secs = 0; 57 hpfs_i(i)->i_n_secs = 0;
59 unlock_kernel(); 58 hpfs_unlock(i->i_sb);
60} 59}
61 60
62static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1c43dbea55e8..c15adbca07ff 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -342,3 +342,25 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
342 extern struct timezone sys_tz; 342 extern struct timezone sys_tz;
343 return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; 343 return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift;
344} 344}
345
346/*
347 * Locking:
348 *
349 * hpfs_lock() is a leftover from the big kernel lock.
350 * Right now, these functions are empty and only left
351 * for documentation purposes. The file system no longer
352 * works on SMP systems, so the lock is not needed
353 * any more.
354 *
355 * If someone is interested in making it work again, this
356 * would be the place to start by adding a per-superblock
357 * mutex and fixing all the bugs and performance issues
358 * caused by that.
359 */
360static inline void hpfs_lock(struct super_block *s)
361{
362}
363
364static inline void hpfs_unlock(struct super_block *s)
365{
366}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1ae35baa539e..87f1f787e767 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,7 +6,6 @@
6 * inode VFS functions 6 * inode VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include "hpfs_fn.h" 10#include "hpfs_fn.h"
12 11
@@ -267,7 +266,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
267 struct inode *inode = dentry->d_inode; 266 struct inode *inode = dentry->d_inode;
268 int error = -EINVAL; 267 int error = -EINVAL;
269 268
270 lock_kernel(); 269 hpfs_lock(inode->i_sb);
271 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) 270 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
272 goto out_unlock; 271 goto out_unlock;
273 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) 272 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
@@ -290,7 +289,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
290 hpfs_write_inode(inode); 289 hpfs_write_inode(inode);
291 290
292 out_unlock: 291 out_unlock:
293 unlock_kernel(); 292 hpfs_unlock(inode->i_sb);
294 return error; 293 return error;
295} 294}
296 295
@@ -307,8 +306,8 @@ void hpfs_evict_inode(struct inode *inode)
307 truncate_inode_pages(&inode->i_data, 0); 306 truncate_inode_pages(&inode->i_data, 0);
308 end_writeback(inode); 307 end_writeback(inode);
309 if (!inode->i_nlink) { 308 if (!inode->i_nlink) {
310 lock_kernel(); 309 hpfs_lock(inode->i_sb);
311 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 310 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
312 unlock_kernel(); 311 hpfs_unlock(inode->i_sb);
313 } 312 }
314} 313}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index f4ad9e31ddc4..d5f8c8a19023 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,7 +6,6 @@
6 * adding & removing files & directories 6 * adding & removing files & directories
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/smp_lock.h>
10#include "hpfs_fn.h" 9#include "hpfs_fn.h"
11 10
12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
@@ -25,7 +24,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
25 struct hpfs_dirent dee; 24 struct hpfs_dirent dee;
26 int err; 25 int err;
27 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; 26 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
28 lock_kernel(); 27 hpfs_lock(dir->i_sb);
29 err = -ENOSPC; 28 err = -ENOSPC;
30 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
31 if (!fnode) 30 if (!fnode)
@@ -103,7 +102,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
103 } 102 }
104 d_instantiate(dentry, result); 103 d_instantiate(dentry, result);
105 mutex_unlock(&hpfs_i(dir)->i_mutex); 104 mutex_unlock(&hpfs_i(dir)->i_mutex);
106 unlock_kernel(); 105 hpfs_unlock(dir->i_sb);
107 return 0; 106 return 0;
108bail3: 107bail3:
109 mutex_unlock(&hpfs_i(dir)->i_mutex); 108 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -115,7 +114,7 @@ bail1:
115 brelse(bh); 114 brelse(bh);
116 hpfs_free_sectors(dir->i_sb, fno, 1); 115 hpfs_free_sectors(dir->i_sb, fno, 1);
117bail: 116bail:
118 unlock_kernel(); 117 hpfs_unlock(dir->i_sb);
119 return err; 118 return err;
120} 119}
121 120
@@ -132,7 +131,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
132 int err; 131 int err;
133 if ((err = hpfs_chk_name(name, &len))) 132 if ((err = hpfs_chk_name(name, &len)))
134 return err==-ENOENT ? -EINVAL : err; 133 return err==-ENOENT ? -EINVAL : err;
135 lock_kernel(); 134 hpfs_lock(dir->i_sb);
136 err = -ENOSPC; 135 err = -ENOSPC;
137 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 136 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
138 if (!fnode) 137 if (!fnode)
@@ -195,7 +194,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
195 } 194 }
196 d_instantiate(dentry, result); 195 d_instantiate(dentry, result);
197 mutex_unlock(&hpfs_i(dir)->i_mutex); 196 mutex_unlock(&hpfs_i(dir)->i_mutex);
198 unlock_kernel(); 197 hpfs_unlock(dir->i_sb);
199 return 0; 198 return 0;
200 199
201bail2: 200bail2:
@@ -205,7 +204,7 @@ bail1:
205 brelse(bh); 204 brelse(bh);
206 hpfs_free_sectors(dir->i_sb, fno, 1); 205 hpfs_free_sectors(dir->i_sb, fno, 1);
207bail: 206bail:
208 unlock_kernel(); 207 hpfs_unlock(dir->i_sb);
209 return err; 208 return err;
210} 209}
211 210
@@ -224,7 +223,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
224 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; 223 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM;
225 if (!new_valid_dev(rdev)) 224 if (!new_valid_dev(rdev))
226 return -EINVAL; 225 return -EINVAL;
227 lock_kernel(); 226 hpfs_lock(dir->i_sb);
228 err = -ENOSPC; 227 err = -ENOSPC;
229 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 228 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
230 if (!fnode) 229 if (!fnode)
@@ -274,7 +273,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
274 d_instantiate(dentry, result); 273 d_instantiate(dentry, result);
275 mutex_unlock(&hpfs_i(dir)->i_mutex); 274 mutex_unlock(&hpfs_i(dir)->i_mutex);
276 brelse(bh); 275 brelse(bh);
277 unlock_kernel(); 276 hpfs_unlock(dir->i_sb);
278 return 0; 277 return 0;
279bail2: 278bail2:
280 mutex_unlock(&hpfs_i(dir)->i_mutex); 279 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -283,7 +282,7 @@ bail1:
283 brelse(bh); 282 brelse(bh);
284 hpfs_free_sectors(dir->i_sb, fno, 1); 283 hpfs_free_sectors(dir->i_sb, fno, 1);
285bail: 284bail:
286 unlock_kernel(); 285 hpfs_unlock(dir->i_sb);
287 return err; 286 return err;
288} 287}
289 288
@@ -299,9 +298,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
299 struct inode *result; 298 struct inode *result;
300 int err; 299 int err;
301 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; 300 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
302 lock_kernel(); 301 hpfs_lock(dir->i_sb);
303 if (hpfs_sb(dir->i_sb)->sb_eas < 2) { 302 if (hpfs_sb(dir->i_sb)->sb_eas < 2) {
304 unlock_kernel(); 303 hpfs_unlock(dir->i_sb);
305 return -EPERM; 304 return -EPERM;
306 } 305 }
307 err = -ENOSPC; 306 err = -ENOSPC;
@@ -354,7 +353,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
354 hpfs_write_inode_nolock(result); 353 hpfs_write_inode_nolock(result);
355 d_instantiate(dentry, result); 354 d_instantiate(dentry, result);
356 mutex_unlock(&hpfs_i(dir)->i_mutex); 355 mutex_unlock(&hpfs_i(dir)->i_mutex);
357 unlock_kernel(); 356 hpfs_unlock(dir->i_sb);
358 return 0; 357 return 0;
359bail2: 358bail2:
360 mutex_unlock(&hpfs_i(dir)->i_mutex); 359 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -363,7 +362,7 @@ bail1:
363 brelse(bh); 362 brelse(bh);
364 hpfs_free_sectors(dir->i_sb, fno, 1); 363 hpfs_free_sectors(dir->i_sb, fno, 1);
365bail: 364bail:
366 unlock_kernel(); 365 hpfs_unlock(dir->i_sb);
367 return err; 366 return err;
368} 367}
369 368
@@ -380,7 +379,7 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
380 int rep = 0; 379 int rep = 0;
381 int err; 380 int err;
382 381
383 lock_kernel(); 382 hpfs_lock(dir->i_sb);
384 hpfs_adjust_length(name, &len); 383 hpfs_adjust_length(name, &len);
385again: 384again:
386 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 385 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
@@ -416,7 +415,7 @@ again:
416 dentry_unhash(dentry); 415 dentry_unhash(dentry);
417 if (!d_unhashed(dentry)) { 416 if (!d_unhashed(dentry)) {
418 dput(dentry); 417 dput(dentry);
419 unlock_kernel(); 418 hpfs_unlock(dir->i_sb);
420 return -ENOSPC; 419 return -ENOSPC;
421 } 420 }
422 if (generic_permission(inode, MAY_WRITE, 0, NULL) || 421 if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
@@ -435,7 +434,7 @@ again:
435 if (!err) 434 if (!err)
436 goto again; 435 goto again;
437 } 436 }
438 unlock_kernel(); 437 hpfs_unlock(dir->i_sb);
439 return -ENOSPC; 438 return -ENOSPC;
440 default: 439 default:
441 drop_nlink(inode); 440 drop_nlink(inode);
@@ -448,7 +447,7 @@ out1:
448out: 447out:
449 mutex_unlock(&hpfs_i(dir)->i_mutex); 448 mutex_unlock(&hpfs_i(dir)->i_mutex);
450 mutex_unlock(&hpfs_i(inode)->i_parent_mutex); 449 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
451 unlock_kernel(); 450 hpfs_unlock(dir->i_sb);
452 return err; 451 return err;
453} 452}
454 453
@@ -466,7 +465,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
466 int r; 465 int r;
467 466
468 hpfs_adjust_length(name, &len); 467 hpfs_adjust_length(name, &len);
469 lock_kernel(); 468 hpfs_lock(dir->i_sb);
470 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 469 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
471 mutex_lock(&hpfs_i(dir)->i_mutex); 470 mutex_lock(&hpfs_i(dir)->i_mutex);
472 err = -ENOENT; 471 err = -ENOENT;
@@ -508,7 +507,7 @@ out1:
508out: 507out:
509 mutex_unlock(&hpfs_i(dir)->i_mutex); 508 mutex_unlock(&hpfs_i(dir)->i_mutex);
510 mutex_unlock(&hpfs_i(inode)->i_parent_mutex); 509 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
511 unlock_kernel(); 510 hpfs_unlock(dir->i_sb);
512 return err; 511 return err;
513} 512}
514 513
@@ -521,21 +520,21 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page)
521 int err; 520 int err;
522 521
523 err = -EIO; 522 err = -EIO;
524 lock_kernel(); 523 hpfs_lock(i->i_sb);
525 if (!(fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) 524 if (!(fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh)))
526 goto fail; 525 goto fail;
527 err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE); 526 err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE);
528 brelse(bh); 527 brelse(bh);
529 if (err) 528 if (err)
530 goto fail; 529 goto fail;
531 unlock_kernel(); 530 hpfs_unlock(i->i_sb);
532 SetPageUptodate(page); 531 SetPageUptodate(page);
533 kunmap(page); 532 kunmap(page);
534 unlock_page(page); 533 unlock_page(page);
535 return 0; 534 return 0;
536 535
537fail: 536fail:
538 unlock_kernel(); 537 hpfs_unlock(i->i_sb);
539 SetPageError(page); 538 SetPageError(page);
540 kunmap(page); 539 kunmap(page);
541 unlock_page(page); 540 unlock_page(page);
@@ -567,7 +566,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
567 err = 0; 566 err = 0;
568 hpfs_adjust_length(old_name, &old_len); 567 hpfs_adjust_length(old_name, &old_len);
569 568
570 lock_kernel(); 569 hpfs_lock(i->i_sb);
571 /* order doesn't matter, due to VFS exclusion */ 570 /* order doesn't matter, due to VFS exclusion */
572 mutex_lock(&hpfs_i(i)->i_parent_mutex); 571 mutex_lock(&hpfs_i(i)->i_parent_mutex);
573 if (new_inode) 572 if (new_inode)
@@ -659,7 +658,7 @@ end1:
659 mutex_unlock(&hpfs_i(i)->i_parent_mutex); 658 mutex_unlock(&hpfs_i(i)->i_parent_mutex);
660 if (new_inode) 659 if (new_inode)
661 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex); 660 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
662 unlock_kernel(); 661 hpfs_unlock(i->i_sb);
663 return err; 662 return err;
664} 663}
665 664
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index b30426b1fc97..c89b40808587 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -13,7 +13,6 @@
13#include <linux/statfs.h> 13#include <linux/statfs.h>
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp_lock.h>
17#include <linux/bitmap.h> 16#include <linux/bitmap.h>
18#include <linux/slab.h> 17#include <linux/slab.h>
19 18
@@ -103,15 +102,11 @@ static void hpfs_put_super(struct super_block *s)
103{ 102{
104 struct hpfs_sb_info *sbi = hpfs_sb(s); 103 struct hpfs_sb_info *sbi = hpfs_sb(s);
105 104
106 lock_kernel();
107
108 kfree(sbi->sb_cp_table); 105 kfree(sbi->sb_cp_table);
109 kfree(sbi->sb_bmp_dir); 106 kfree(sbi->sb_bmp_dir);
110 unmark_dirty(s); 107 unmark_dirty(s);
111 s->s_fs_info = NULL; 108 s->s_fs_info = NULL;
112 kfree(sbi); 109 kfree(sbi);
113
114 unlock_kernel();
115} 110}
116 111
117unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 112unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -143,7 +138,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
143 struct super_block *s = dentry->d_sb; 138 struct super_block *s = dentry->d_sb;
144 struct hpfs_sb_info *sbi = hpfs_sb(s); 139 struct hpfs_sb_info *sbi = hpfs_sb(s);
145 u64 id = huge_encode_dev(s->s_bdev->bd_dev); 140 u64 id = huge_encode_dev(s->s_bdev->bd_dev);
146 lock_kernel(); 141 hpfs_lock(s);
147 142
148 /*if (sbi->sb_n_free == -1) {*/ 143 /*if (sbi->sb_n_free == -1) {*/
149 sbi->sb_n_free = count_bitmaps(s); 144 sbi->sb_n_free = count_bitmaps(s);
@@ -160,7 +155,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
160 buf->f_fsid.val[1] = (u32)(id >> 32); 155 buf->f_fsid.val[1] = (u32)(id >> 32);
161 buf->f_namelen = 254; 156 buf->f_namelen = 254;
162 157
163 unlock_kernel(); 158 hpfs_unlock(s);
164 159
165 return 0; 160 return 0;
166} 161}
@@ -406,7 +401,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
406 401
407 *flags |= MS_NOATIME; 402 *flags |= MS_NOATIME;
408 403
409 lock_kernel(); 404 hpfs_lock(s);
410 lock_super(s); 405 lock_super(s);
411 uid = sbi->sb_uid; gid = sbi->sb_gid; 406 uid = sbi->sb_uid; gid = sbi->sb_gid;
412 umask = 0777 & ~sbi->sb_mode; 407 umask = 0777 & ~sbi->sb_mode;
@@ -441,12 +436,12 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
441 replace_mount_options(s, new_opts); 436 replace_mount_options(s, new_opts);
442 437
443 unlock_super(s); 438 unlock_super(s);
444 unlock_kernel(); 439 hpfs_unlock(s);
445 return 0; 440 return 0;
446 441
447out_err: 442out_err:
448 unlock_super(s); 443 unlock_super(s);
449 unlock_kernel(); 444 hpfs_unlock(s);
450 kfree(new_opts); 445 kfree(new_opts);
451 return -EINVAL; 446 return -EINVAL;
452} 447}
@@ -484,13 +479,15 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
484 479
485 int o; 480 int o;
486 481
487 lock_kernel(); 482 if (num_possible_cpus() > 1) {
483 printk(KERN_ERR "HPFS is not SMP safe\n");
484 return -EINVAL;
485 }
488 486
489 save_mount_options(s, options); 487 save_mount_options(s, options);
490 488
491 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 489 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
492 if (!sbi) { 490 if (!sbi) {
493 unlock_kernel();
494 return -ENOMEM; 491 return -ENOMEM;
495 } 492 }
496 s->s_fs_info = sbi; 493 s->s_fs_info = sbi;
@@ -677,7 +674,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
677 root->i_blocks = 5; 674 root->i_blocks = 5;
678 hpfs_brelse4(&qbh); 675 hpfs_brelse4(&qbh);
679 } 676 }
680 unlock_kernel();
681 return 0; 677 return 0;
682 678
683bail4: brelse(bh2); 679bail4: brelse(bh2);
@@ -689,7 +685,6 @@ bail0:
689 kfree(sbi->sb_cp_table); 685 kfree(sbi->sb_cp_table);
690 s->s_fs_info = NULL; 686 s->s_fs_info = NULL;
691 kfree(sbi); 687 kfree(sbi);
692 unlock_kernel();
693 return -EINVAL; 688 return -EINVAL;
694} 689}
695 690
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..9910c039f026 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
84DEFINE_SPINLOCK(inode_lock); 84DEFINE_SPINLOCK(inode_lock);
85 85
86/* 86/*
87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 87 * iprune_sem provides exclusion between the icache shrinking and the
88 * icache shrinking path, and the umount path. Without this exclusion, 88 * umount path.
89 * by the time prune_icache calls iput for the inode whose pages it has
90 * been invalidating, or by the time it calls clear_inode & destroy_inode
91 * from its final dispose_list, the struct super_block they refer to
92 * (for inode->i_sb->s_op) may already have been freed and reused.
93 * 89 *
94 * We make this an rwsem because the fastpath is icache shrinking. In 90 * We don't actually need it to protect anything in the umount path,
95 * some cases a filesystem may be doing a significant amount of work in 91 * but only need to cycle through it to make sure any inode that
96 * its inode reclaim code, so this should improve parallelism. 92 * prune_icache took off the LRU list has been fully torn down by the
93 * time we are past evict_inodes.
97 */ 94 */
98static DECLARE_RWSEM(iprune_sem); 95static DECLARE_RWSEM(iprune_sem);
99 96
@@ -295,6 +292,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 292 call_rcu(&inode->i_rcu, i_callback);
296} 293}
297 294
295void address_space_init_once(struct address_space *mapping)
296{
297 memset(mapping, 0, sizeof(*mapping));
298 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
299 spin_lock_init(&mapping->tree_lock);
300 spin_lock_init(&mapping->i_mmap_lock);
301 INIT_LIST_HEAD(&mapping->private_list);
302 spin_lock_init(&mapping->private_lock);
303 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
304 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
305 mutex_init(&mapping->unmap_mutex);
306}
307EXPORT_SYMBOL(address_space_init_once);
308
298/* 309/*
299 * These are initializations that only need to be done 310 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 311 * once, because the fields are idempotent across use
@@ -308,13 +319,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 319 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 320 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 321 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 322 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 323 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 324#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 325 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -508,17 +513,12 @@ void evict_inodes(struct super_block *sb)
508 struct inode *inode, *next; 513 struct inode *inode, *next;
509 LIST_HEAD(dispose); 514 LIST_HEAD(dispose);
510 515
511 down_write(&iprune_sem);
512
513 spin_lock(&inode_lock); 516 spin_lock(&inode_lock);
514 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 517 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
515 if (atomic_read(&inode->i_count)) 518 if (atomic_read(&inode->i_count))
516 continue; 519 continue;
517 520 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
518 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
519 WARN_ON(1);
520 continue; 521 continue;
521 }
522 522
523 inode->i_state |= I_FREEING; 523 inode->i_state |= I_FREEING;
524 524
@@ -534,28 +534,40 @@ void evict_inodes(struct super_block *sb)
534 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
535 535
536 dispose_list(&dispose); 536 dispose_list(&dispose);
537
538 /*
539 * Cycle through iprune_sem to make sure any inode that prune_icache
540 * moved off the list before we took the lock has been fully torn
541 * down.
542 */
543 down_write(&iprune_sem);
537 up_write(&iprune_sem); 544 up_write(&iprune_sem);
538} 545}
539 546
540/** 547/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 548 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 549 * @sb: superblock to operate on
550 * @kill_dirty: flag to guide handling of dirty inodes
543 * 551 *
544 * Attempts to free all inodes for a given superblock. If there were any 552 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 553 * busy inodes return a non-zero value, else zero.
554 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
555 * them as busy.
546 */ 556 */
547int invalidate_inodes(struct super_block *sb) 557int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 558{
549 int busy = 0; 559 int busy = 0;
550 struct inode *inode, *next; 560 struct inode *inode, *next;
551 LIST_HEAD(dispose); 561 LIST_HEAD(dispose);
552 562
553 down_write(&iprune_sem);
554
555 spin_lock(&inode_lock); 563 spin_lock(&inode_lock);
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 564 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 565 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 566 continue;
567 if (inode->i_state & I_DIRTY && !kill_dirty) {
568 busy = 1;
569 continue;
570 }
559 if (atomic_read(&inode->i_count)) { 571 if (atomic_read(&inode->i_count)) {
560 busy = 1; 572 busy = 1;
561 continue; 573 continue;
@@ -575,7 +587,6 @@ int invalidate_inodes(struct super_block *sb)
575 spin_unlock(&inode_lock); 587 spin_unlock(&inode_lock);
576 588
577 dispose_list(&dispose); 589 dispose_list(&dispose);
578 up_write(&iprune_sem);
579 590
580 return busy; 591 return busy;
581} 592}
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
112 */ 125 */
113extern int get_nr_dirty_inodes(void); 126extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 127extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 128extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index da1b5e4ffce1..eb11601f2e00 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -839,7 +839,7 @@ journal_t * journal_init_inode (struct inode *inode)
839 err = journal_bmap(journal, 0, &blocknr); 839 err = journal_bmap(journal, 0, &blocknr);
840 /* If that failed, give up */ 840 /* If that failed, give up */
841 if (err) { 841 if (err) {
842 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 842 printk(KERN_ERR "%s: Cannot locate journal superblock\n",
843 __func__); 843 __func__);
844 goto out_err; 844 goto out_err;
845 } 845 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 97e73469b2c4..90407b8fece7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -991,7 +991,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
991 err = jbd2_journal_bmap(journal, 0, &blocknr); 991 err = jbd2_journal_bmap(journal, 0, &blocknr);
992 /* If that failed, give up */ 992 /* If that failed, give up */
993 if (err) { 993 if (err) {
994 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 994 printk(KERN_ERR "%s: Cannot locate journal superblock\n",
995 __func__); 995 __func__);
996 goto out_err; 996 goto out_err;
997 } 997 }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 92978658ed18..82faddd1f321 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -215,8 +215,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
215 no chance of AB-BA deadlock involving its f->sem). */ 215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem); 216 mutex_unlock(&f->sem);
217 217
218 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
219 dentry->d_name.name, dentry->d_name.len);
220 if (ret) 219 if (ret)
221 goto fail; 220 goto fail;
222 221
@@ -386,7 +385,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
386 385
387 jffs2_complete_reservation(c); 386 jffs2_complete_reservation(c);
388 387
389 ret = jffs2_init_security(inode, dir_i); 388 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
390 if (ret) 389 if (ret)
391 goto fail; 390 goto fail;
392 391
@@ -530,7 +529,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
530 529
531 jffs2_complete_reservation(c); 530 jffs2_complete_reservation(c);
532 531
533 ret = jffs2_init_security(inode, dir_i); 532 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
534 if (ret) 533 if (ret)
535 goto fail; 534 goto fail;
536 535
@@ -703,7 +702,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
703 702
704 jffs2_complete_reservation(c); 703 jffs2_complete_reservation(c);
705 704
706 ret = jffs2_init_security(inode, dir_i); 705 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
707 if (ret) 706 if (ret)
708 goto fail; 707 goto fail;
709 708
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 5a53d9bdb2b5..e4619b00f7c5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -401,7 +401,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
401 struct jffs2_raw_inode *ri, unsigned char *buf, 401 struct jffs2_raw_inode *ri, unsigned char *buf,
402 uint32_t offset, uint32_t writelen, uint32_t *retlen); 402 uint32_t offset, uint32_t writelen, uint32_t *retlen);
403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, 403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
404 struct jffs2_raw_inode *ri, const char *name, int namelen); 404 struct jffs2_raw_inode *ri, const struct qstr *qstr);
405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, 405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time); 406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, 407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 239f51216a68..cfeb7164b085 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,14 +23,15 @@
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* ---- Initial Security Label Attachment -------------- */ 25/* ---- Initial Security Label Attachment -------------- */
26int jffs2_init_security(struct inode *inode, struct inode *dir) 26int jffs2_init_security(struct inode *inode, struct inode *dir,
27 const struct qstr *qstr)
27{ 28{
28 int rc; 29 int rc;
29 size_t len; 30 size_t len;
30 void *value; 31 void *value;
31 char *name; 32 char *name;
32 33
33 rc = security_inode_init_security(inode, dir, &name, &value, &len); 34 rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
34 if (rc) { 35 if (rc) {
35 if (rc == -EOPNOTSUPP) 36 if (rc == -EOPNOTSUPP)
36 return 0; 37 return 0;
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index c819eb0e982d..30d175b6d290 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -424,7 +424,9 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
424 return ret; 424 return ret;
425} 425}
426 426
427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen) 427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
428 struct jffs2_inode_info *f, struct jffs2_raw_inode *ri,
429 const struct qstr *qstr)
428{ 430{
429 struct jffs2_raw_dirent *rd; 431 struct jffs2_raw_dirent *rd;
430 struct jffs2_full_dnode *fn; 432 struct jffs2_full_dnode *fn;
@@ -466,15 +468,15 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
466 mutex_unlock(&f->sem); 468 mutex_unlock(&f->sem);
467 jffs2_complete_reservation(c); 469 jffs2_complete_reservation(c);
468 470
469 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode); 471 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode, qstr);
470 if (ret) 472 if (ret)
471 return ret; 473 return ret;
472 ret = jffs2_init_acl_post(&f->vfs_inode); 474 ret = jffs2_init_acl_post(&f->vfs_inode);
473 if (ret) 475 if (ret)
474 return ret; 476 return ret;
475 477
476 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 478 ret = jffs2_reserve_space(c, sizeof(*rd)+qstr->len, &alloclen,
477 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 479 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(qstr->len));
478 480
479 if (ret) { 481 if (ret) {
480 /* Eep. */ 482 /* Eep. */
@@ -493,19 +495,19 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
493 495
494 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 496 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
495 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 497 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
496 rd->totlen = cpu_to_je32(sizeof(*rd) + namelen); 498 rd->totlen = cpu_to_je32(sizeof(*rd) + qstr->len);
497 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)); 499 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4));
498 500
499 rd->pino = cpu_to_je32(dir_f->inocache->ino); 501 rd->pino = cpu_to_je32(dir_f->inocache->ino);
500 rd->version = cpu_to_je32(++dir_f->highest_version); 502 rd->version = cpu_to_je32(++dir_f->highest_version);
501 rd->ino = ri->ino; 503 rd->ino = ri->ino;
502 rd->mctime = ri->ctime; 504 rd->mctime = ri->ctime;
503 rd->nsize = namelen; 505 rd->nsize = qstr->len;
504 rd->type = DT_REG; 506 rd->type = DT_REG;
505 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 507 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
506 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 508 rd->name_crc = cpu_to_je32(crc32(0, qstr->name, qstr->len));
507 509
508 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL); 510 fd = jffs2_write_dirent(c, dir_f, rd, qstr->name, qstr->len, ALLOC_NORMAL);
509 511
510 jffs2_free_raw_dirent(rd); 512 jffs2_free_raw_dirent(rd);
511 513
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index cf4f5759b42b..7be4beb306f3 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -121,10 +121,11 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
121#endif /* CONFIG_JFFS2_FS_XATTR */ 121#endif /* CONFIG_JFFS2_FS_XATTR */
122 122
123#ifdef CONFIG_JFFS2_FS_SECURITY 123#ifdef CONFIG_JFFS2_FS_SECURITY
124extern int jffs2_init_security(struct inode *inode, struct inode *dir); 124extern int jffs2_init_security(struct inode *inode, struct inode *dir,
125 const struct qstr *qstr);
125extern const struct xattr_handler jffs2_security_xattr_handler; 126extern const struct xattr_handler jffs2_security_xattr_handler;
126#else 127#else
127#define jffs2_init_security(inode,dir) (0) 128#define jffs2_init_security(inode,dir,qstr) (0)
128#endif /* CONFIG_JFFS2_FS_SECURITY */ 129#endif /* CONFIG_JFFS2_FS_SECURITY */
129 130
130#endif /* _JFFS2_FS_XATTR_H_ */ 131#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 88b6cc535bf2..e9e100fd7c09 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -62,10 +62,11 @@ extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
62extern int jfs_removexattr(struct dentry *, const char *); 62extern int jfs_removexattr(struct dentry *, const char *);
63 63
64#ifdef CONFIG_JFS_SECURITY 64#ifdef CONFIG_JFS_SECURITY
65extern int jfs_init_security(tid_t, struct inode *, struct inode *); 65extern int jfs_init_security(tid_t, struct inode *, struct inode *,
66 const struct qstr *);
66#else 67#else
67static inline int jfs_init_security(tid_t tid, struct inode *inode, 68static inline int jfs_init_security(tid_t tid, struct inode *inode,
68 struct inode *dir) 69 struct inode *dir, const struct qstr *qstr)
69{ 70{
70 return 0; 71 return 0;
71} 72}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..eaaf2b511e89 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -115,7 +115,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
115 if (rc) 115 if (rc)
116 goto out3; 116 goto out3;
117 117
118 rc = jfs_init_security(tid, ip, dip); 118 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
119 if (rc) { 119 if (rc) {
120 txAbort(tid, 0); 120 txAbort(tid, 0);
121 goto out3; 121 goto out3;
@@ -253,7 +253,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
253 if (rc) 253 if (rc)
254 goto out3; 254 goto out3;
255 255
256 rc = jfs_init_security(tid, ip, dip); 256 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
257 if (rc) { 257 if (rc) {
258 txAbort(tid, 0); 258 txAbort(tid, 0);
259 goto out3; 259 goto out3;
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
@@ -932,7 +929,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
932 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); 929 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
933 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); 930 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
934 931
935 rc = jfs_init_security(tid, ip, dip); 932 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
936 if (rc) 933 if (rc)
937 goto out3; 934 goto out3;
938 935
@@ -1395,7 +1392,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1395 if (rc) 1392 if (rc)
1396 goto out3; 1393 goto out3;
1397 1394
1398 rc = jfs_init_security(tid, ip, dir); 1395 rc = jfs_init_security(tid, ip, dir, &dentry->d_name);
1399 if (rc) { 1396 if (rc) {
1400 txAbort(tid, 0); 1397 txAbort(tid, 0);
1401 goto out3; 1398 goto out3;
@@ -1600,7 +1597,7 @@ out:
1600 1597
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1599{
1603 if (nd->flags & LOOKUP_RCU) 1600 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1601 return -ECHILD;
1605 /* 1602 /*
1606 * This is not negative dentry. Always valid. 1603 * This is not negative dentry. Always valid.
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 2d7f165d0f1d..3fa4c32272df 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -1091,7 +1091,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
1091} 1091}
1092 1092
1093#ifdef CONFIG_JFS_SECURITY 1093#ifdef CONFIG_JFS_SECURITY
1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir) 1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
1095 const struct qstr *qstr)
1095{ 1096{
1096 int rc; 1097 int rc;
1097 size_t len; 1098 size_t len;
@@ -1099,7 +1100,8 @@ int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
1099 char *suffix; 1100 char *suffix;
1100 char *name; 1101 char *name;
1101 1102
1102 rc = security_inode_init_security(inode, dir, &suffix, &value, &len); 1103 rc = security_inode_init_security(inode, dir, qstr, &suffix, &value,
1104 &len);
1103 if (rc) { 1105 if (rc) {
1104 if (rc == -EOPNOTSUPP) 1106 if (rc == -EOPNOTSUPP)
1105 return 0; 1107 return 0;
diff --git a/fs/locks.c b/fs/locks.c
index 0f3998291f78..822c3d1843af 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -145,7 +145,6 @@ static DEFINE_SPINLOCK(file_lock_lock);
145 145
146/* 146/*
147 * Protects the two list heads above, plus the inode->i_flock list 147 * Protects the two list heads above, plus the inode->i_flock list
148 * FIXME: should use a spinlock, once lockd and ceph are ready.
149 */ 148 */
150void lock_flocks(void) 149void lock_flocks(void)
151{ 150{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6b..b912b7abe747 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (link->mnt == nd->path.mnt)
757 mntget(link->mnt);
758
759 if (unlikely(current->total_link_count >= 40)) {
760 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
761 path_put(&nd->path);
762 return -ELOOP;
763 }
764 cond_resched();
765 current->total_link_count++;
766
768 touch_atime(link->mnt, dentry); 767 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 768 nd_set_link(nd, NULL);
770 769
771 if (link->mnt == nd->path.mnt) 770 error = security_inode_follow_link(link->dentry, nd);
772 mntget(link->mnt); 771 if (error) {
772 *p = ERR_PTR(error); /* no ->put_link(), please */
773 path_put(&nd->path);
774 return error;
775 }
773 776
774 nd->last_type = LAST_BIND; 777 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 778 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,56 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 783 if (s)
781 error = __vfs_follow_link(nd, s); 784 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 785 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 786 nd->flags |= LOOKUP_JUMPED;
784 if (error) 787 nd->inode = nd->path.dentry->d_inode;
788 if (nd->inode->i_op->follow_link) {
789 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 790 path_put(&nd->path);
791 error = -ELOOP;
792 }
786 } 793 }
787 } 794 }
788 return error; 795 return error;
789} 796}
790 797
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 798static int follow_up_rcu(struct path *path)
834{ 799{
835 struct vfsmount *parent; 800 struct vfsmount *parent;
@@ -1068,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1033
1069 seq = read_seqcount_begin(&parent->d_seq); 1034 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1035 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1036 goto failed;
1072 inode = parent->d_inode; 1037 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1038 nd->path.dentry = parent;
1074 nd->seq = seq; 1039 nd->seq = seq;
@@ -1081,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1046 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1047 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1048 nd->inode = inode;
1084
1085 return 0; 1049 return 0;
1050
1051failed:
1052 nd->flags &= ~LOOKUP_RCU;
1053 if (!(nd->flags & LOOKUP_ROOT))
1054 nd->root.mnt = NULL;
1055 rcu_read_unlock();
1056 br_read_unlock(vfsmount_lock);
1057 return -ECHILD;
1086} 1058}
1087 1059
1088/* 1060/*
@@ -1216,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1188{
1217 struct vfsmount *mnt = nd->path.mnt; 1189 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1190 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1191 int need_reval = 1;
1192 int status = 1;
1220 int err; 1193 int err;
1221 1194
1222 /* 1195 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1196 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1197 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1198 * do the non-racy lookup, below.
1236 */ 1199 */
1237 if (nd->flags & LOOKUP_RCU) { 1200 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1201 unsigned seq;
1239
1240 *inode = nd->inode; 1202 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1203 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1204 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1205 goto unlazy;
1244 return -ECHILD; 1206
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1207 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1208 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1209 return -ECHILD;
1250
1251 nd->seq = seq; 1210 nd->seq = seq;
1211
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1212 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1213 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1214 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1215 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1216 need_reval = 0;
1257 goto fail; 1217 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1218 }
1259 goto done;
1260 } 1219 }
1261 path->mnt = mnt; 1220 path->mnt = mnt;
1262 path->dentry = dentry; 1221 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1222 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1223 return 0;
1265 if (nameidata_drop_rcu(nd)) 1224unlazy:
1266 return -ECHILD; 1225 if (dentry) {
1267 /* fallthru */ 1226 if (nameidata_dentry_drop_rcu(nd, dentry))
1227 return -ECHILD;
1228 } else {
1229 if (nameidata_drop_rcu(nd))
1230 return -ECHILD;
1231 }
1232 } else {
1233 dentry = __d_lookup(parent, name);
1268 } 1234 }
1269 dentry = __d_lookup(parent, name); 1235
1270 if (!dentry) 1236retry:
1271 goto need_lookup; 1237 if (unlikely(!dentry)) {
1272found: 1238 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1239 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1240
1275 if (!dentry) 1241 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1242 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1243 if (likely(!dentry)) {
1278 goto fail; 1244 dentry = d_alloc_and_lookup(parent, name, nd);
1245 if (IS_ERR(dentry)) {
1246 mutex_unlock(&dir->i_mutex);
1247 return PTR_ERR(dentry);
1248 }
1249 /* known good */
1250 need_reval = 0;
1251 status = 1;
1252 }
1253 mutex_unlock(&dir->i_mutex);
1279 } 1254 }
1280done: 1255 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1256 status = d_revalidate(dentry, nd);
1257 if (unlikely(status <= 0)) {
1258 if (status < 0) {
1259 dput(dentry);
1260 return status;
1261 }
1262 if (!d_invalidate(dentry)) {
1263 dput(dentry);
1264 dentry = NULL;
1265 need_reval = 1;
1266 goto retry;
1267 }
1268 }
1269
1281 path->mnt = mnt; 1270 path->mnt = mnt;
1282 path->dentry = dentry; 1271 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1272 err = follow_managed(path, nd->flags);
@@ -1287,39 +1276,113 @@ done:
1287 } 1276 }
1288 *inode = path->dentry->d_inode; 1277 *inode = path->dentry->d_inode;
1289 return 0; 1278 return 0;
1279}
1280
1281static inline int may_lookup(struct nameidata *nd)
1282{
1283 if (nd->flags & LOOKUP_RCU) {
1284 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1285 if (err != -ECHILD)
1286 return err;
1287 if (nameidata_drop_rcu(nd))
1288 return -ECHILD;
1289 }
1290 return exec_permission(nd->inode, 0);
1291}
1290 1292
1291need_lookup: 1293static inline int handle_dots(struct nameidata *nd, int type)
1292 dir = parent->d_inode; 1294{
1293 BUG_ON(nd->inode != dir); 1295 if (type == LAST_DOTDOT) {
1296 if (nd->flags & LOOKUP_RCU) {
1297 if (follow_dotdot_rcu(nd))
1298 return -ECHILD;
1299 } else
1300 follow_dotdot(nd);
1301 }
1302 return 0;
1303}
1294 1304
1295 mutex_lock(&dir->i_mutex); 1305static void terminate_walk(struct nameidata *nd)
1296 /* 1306{
1297 * First re-do the cached lookup just in case it was created 1307 if (!(nd->flags & LOOKUP_RCU)) {
1298 * while we waited for the directory semaphore, or the first 1308 path_put(&nd->path);
1299 * lookup failed due to an unrelated rename. 1309 } else {
1300 * 1310 nd->flags &= ~LOOKUP_RCU;
1301 * This could use version numbering or similar to avoid unnecessary 1311 if (!(nd->flags & LOOKUP_ROOT))
1302 * cache lookups, but then we'd have to do the first lookup in the 1312 nd->root.mnt = NULL;
1303 * non-racy way. However in the common case here, everything should 1313 rcu_read_unlock();
1304 * be hot in cache, so would it be a big win? 1314 br_read_unlock(vfsmount_lock);
1305 */
1306 dentry = d_lookup(parent, name);
1307 if (likely(!dentry)) {
1308 dentry = d_alloc_and_lookup(parent, name, nd);
1309 mutex_unlock(&dir->i_mutex);
1310 if (IS_ERR(dentry))
1311 goto fail;
1312 goto done;
1313 } 1315 }
1316}
1317
1318static inline int walk_component(struct nameidata *nd, struct path *path,
1319 struct qstr *name, int type, int follow)
1320{
1321 struct inode *inode;
1322 int err;
1314 /* 1323 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1324 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1325 * to be able to know about the current root directory and
1326 * parent relationships.
1317 */ 1327 */
1318 mutex_unlock(&dir->i_mutex); 1328 if (unlikely(type != LAST_NORM))
1319 goto found; 1329 return handle_dots(nd, type);
1330 err = do_lookup(nd, name, path, &inode);
1331 if (unlikely(err)) {
1332 terminate_walk(nd);
1333 return err;
1334 }
1335 if (!inode) {
1336 path_to_nameidata(path, nd);
1337 terminate_walk(nd);
1338 return -ENOENT;
1339 }
1340 if (unlikely(inode->i_op->follow_link) && follow) {
1341 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1342 return -ECHILD;
1343 BUG_ON(inode != path->dentry->d_inode);
1344 return 1;
1345 }
1346 path_to_nameidata(path, nd);
1347 nd->inode = inode;
1348 return 0;
1349}
1320 1350
1321fail: 1351/*
1322 return PTR_ERR(dentry); 1352 * This limits recursive symlink follows to 8, while
1353 * limiting consecutive symlinks to 40.
1354 *
1355 * Without that kind of total limit, nasty chains of consecutive
1356 * symlinks can cause almost arbitrarily long lookups.
1357 */
1358static inline int nested_symlink(struct path *path, struct nameidata *nd)
1359{
1360 int res;
1361
1362 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1363 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1364 path_put_conditional(path, nd);
1365 path_put(&nd->path);
1366 return -ELOOP;
1367 }
1368
1369 nd->depth++;
1370 current->link_count++;
1371
1372 do {
1373 struct path link = *path;
1374 void *cookie;
1375
1376 res = follow_link(&link, nd, &cookie);
1377 if (!res)
1378 res = walk_component(nd, path, &nd->last,
1379 nd->last_type, LOOKUP_FOLLOW);
1380 put_link(nd, &link, cookie);
1381 } while (res > 0);
1382
1383 current->link_count--;
1384 nd->depth--;
1385 return res;
1323} 1386}
1324 1387
1325/* 1388/*
@@ -1339,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1402 while (*name=='/')
1340 name++; 1403 name++;
1341 if (!*name) 1404 if (!*name)
1342 goto return_reval; 1405 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1406
1347 /* At this point we know we have a real path component. */ 1407 /* At this point we know we have a real path component. */
1348 for(;;) { 1408 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1409 unsigned long hash;
1351 struct qstr this; 1410 struct qstr this;
1352 unsigned int c; 1411 unsigned int c;
1412 int type;
1353 1413
1354 nd->flags |= LOOKUP_CONTINUE; 1414 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1415
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1416 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1417 if (err)
1367 break; 1418 break;
1368 1419
@@ -1378,52 +1429,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1429 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1430 this.hash = end_name_hash(hash);
1380 1431
1432 type = LAST_NORM;
1433 if (this.name[0] == '.') switch (this.len) {
1434 case 2:
1435 if (this.name[1] == '.') {
1436 type = LAST_DOTDOT;
1437 nd->flags |= LOOKUP_JUMPED;
1438 }
1439 break;
1440 case 1:
1441 type = LAST_DOT;
1442 }
1443 if (likely(type == LAST_NORM)) {
1444 struct dentry *parent = nd->path.dentry;
1445 nd->flags &= ~LOOKUP_JUMPED;
1446 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1447 err = parent->d_op->d_hash(parent, nd->inode,
1448 &this);
1449 if (err < 0)
1450 break;
1451 }
1452 }
1453
1381 /* remove trailing slashes? */ 1454 /* remove trailing slashes? */
1382 if (!c) 1455 if (!c)
1383 goto last_component; 1456 goto last_component;
1384 while (*++name == '/'); 1457 while (*++name == '/');
1385 if (!*name) 1458 if (!*name)
1386 goto last_with_slashes; 1459 goto last_component;
1387 1460
1388 /* 1461 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1462 if (err < 0)
1390 * to be able to know about the current root directory and 1463 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1464
1416 if (inode->i_op->follow_link) { 1465 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1466 err = nested_symlink(&next, nd);
1418 if (err) 1467 if (err)
1419 goto return_err; 1468 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1469 }
1428 err = -ENOTDIR; 1470 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1471 if (!nd->inode->i_op->lookup)
@@ -1431,209 +1473,109 @@ exec_again:
1431 continue; 1473 continue;
1432 /* here ends the main loop */ 1474 /* here ends the main loop */
1433 1475
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1476last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1477 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1478 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1479 nd->last = this;
1480 nd->last_type = LAST_NORM; 1480 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1481 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1482 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1483 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1484 return err;
1518} 1485}
1519 1486
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1487static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1488 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 path_get(&nd->path);
1550 nd->flags |= LOOKUP_REVAL;
1551 result = link_path_walk(name, nd);
1552 }
1553
1554 path_put(&save);
1555
1556 return result;
1557}
1558
1559static void path_finish_rcu(struct nameidata *nd)
1560{
1561 if (nd->flags & LOOKUP_RCU) {
1562 /* RCU dangling. Cancel it. */
1563 nd->flags &= ~LOOKUP_RCU;
1564 nd->root.mnt = NULL;
1565 rcu_read_unlock();
1566 br_read_unlock(vfsmount_lock);
1567 }
1568 if (nd->file)
1569 fput(nd->file);
1570}
1571
1572static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1573{ 1489{
1574 int retval = 0; 1490 int retval = 0;
1575 int fput_needed; 1491 int fput_needed;
1576 struct file *file; 1492 struct file *file;
1577 1493
1578 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1494 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1579 nd->flags = flags | LOOKUP_RCU; 1495 nd->flags = flags | LOOKUP_JUMPED;
1580 nd->depth = 0; 1496 nd->depth = 0;
1497 if (flags & LOOKUP_ROOT) {
1498 struct inode *inode = nd->root.dentry->d_inode;
1499 if (*name) {
1500 if (!inode->i_op->lookup)
1501 return -ENOTDIR;
1502 retval = inode_permission(inode, MAY_EXEC);
1503 if (retval)
1504 return retval;
1505 }
1506 nd->path = nd->root;
1507 nd->inode = inode;
1508 if (flags & LOOKUP_RCU) {
1509 br_read_lock(vfsmount_lock);
1510 rcu_read_lock();
1511 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1512 } else {
1513 path_get(&nd->path);
1514 }
1515 return 0;
1516 }
1517
1581 nd->root.mnt = NULL; 1518 nd->root.mnt = NULL;
1582 nd->file = NULL;
1583 1519
1584 if (*name=='/') { 1520 if (*name=='/') {
1585 struct fs_struct *fs = current->fs; 1521 if (flags & LOOKUP_RCU) {
1586 unsigned seq; 1522 br_read_lock(vfsmount_lock);
1587 1523 rcu_read_lock();
1588 br_read_lock(vfsmount_lock); 1524 set_root_rcu(nd);
1589 rcu_read_lock(); 1525 } else {
1590 1526 set_root(nd);
1591 do { 1527 path_get(&nd->root);
1592 seq = read_seqcount_begin(&fs->seq); 1528 }
1593 nd->root = fs->root; 1529 nd->path = nd->root;
1594 nd->path = nd->root;
1595 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1596 } while (read_seqcount_retry(&fs->seq, seq));
1597
1598 } else if (dfd == AT_FDCWD) { 1530 } else if (dfd == AT_FDCWD) {
1599 struct fs_struct *fs = current->fs; 1531 if (flags & LOOKUP_RCU) {
1600 unsigned seq; 1532 struct fs_struct *fs = current->fs;
1601 1533 unsigned seq;
1602 br_read_lock(vfsmount_lock);
1603 rcu_read_lock();
1604 1534
1605 do { 1535 br_read_lock(vfsmount_lock);
1606 seq = read_seqcount_begin(&fs->seq); 1536 rcu_read_lock();
1607 nd->path = fs->pwd;
1608 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1609 } while (read_seqcount_retry(&fs->seq, seq));
1610 1537
1538 do {
1539 seq = read_seqcount_begin(&fs->seq);
1540 nd->path = fs->pwd;
1541 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1542 } while (read_seqcount_retry(&fs->seq, seq));
1543 } else {
1544 get_fs_pwd(current->fs, &nd->path);
1545 }
1611 } else { 1546 } else {
1612 struct dentry *dentry; 1547 struct dentry *dentry;
1613 1548
1614 file = fget_light(dfd, &fput_needed); 1549 file = fget_raw_light(dfd, &fput_needed);
1615 retval = -EBADF; 1550 retval = -EBADF;
1616 if (!file) 1551 if (!file)
1617 goto out_fail; 1552 goto out_fail;
1618 1553
1619 dentry = file->f_path.dentry; 1554 dentry = file->f_path.dentry;
1620 1555
1621 retval = -ENOTDIR; 1556 if (*name) {
1622 if (!S_ISDIR(dentry->d_inode->i_mode)) 1557 retval = -ENOTDIR;
1623 goto fput_fail; 1558 if (!S_ISDIR(dentry->d_inode->i_mode))
1559 goto fput_fail;
1624 1560
1625 retval = file_permission(file, MAY_EXEC); 1561 retval = file_permission(file, MAY_EXEC);
1626 if (retval) 1562 if (retval)
1627 goto fput_fail; 1563 goto fput_fail;
1564 }
1628 1565
1629 nd->path = file->f_path; 1566 nd->path = file->f_path;
1630 if (fput_needed) 1567 if (flags & LOOKUP_RCU) {
1631 nd->file = file; 1568 if (fput_needed)
1632 1569 *fp = file;
1633 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1570 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1634 br_read_lock(vfsmount_lock); 1571 br_read_lock(vfsmount_lock);
1635 rcu_read_lock(); 1572 rcu_read_lock();
1573 } else {
1574 path_get(&file->f_path);
1575 fput_light(file, fput_needed);
1576 }
1636 } 1577 }
1578
1637 nd->inode = nd->path.dentry->d_inode; 1579 nd->inode = nd->path.dentry->d_inode;
1638 return 0; 1580 return 0;
1639 1581
@@ -1643,60 +1585,23 @@ out_fail:
1643 return retval; 1585 return retval;
1644} 1586}
1645 1587
1646static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1588static inline int lookup_last(struct nameidata *nd, struct path *path)
1647{ 1589{
1648 int retval = 0; 1590 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1649 int fput_needed; 1591 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1650 struct file *file;
1651
1652 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1653 nd->flags = flags;
1654 nd->depth = 0;
1655 nd->root.mnt = NULL;
1656
1657 if (*name=='/') {
1658 set_root(nd);
1659 nd->path = nd->root;
1660 path_get(&nd->root);
1661 } else if (dfd == AT_FDCWD) {
1662 get_fs_pwd(current->fs, &nd->path);
1663 } else {
1664 struct dentry *dentry;
1665
1666 file = fget_light(dfd, &fput_needed);
1667 retval = -EBADF;
1668 if (!file)
1669 goto out_fail;
1670
1671 dentry = file->f_path.dentry;
1672
1673 retval = -ENOTDIR;
1674 if (!S_ISDIR(dentry->d_inode->i_mode))
1675 goto fput_fail;
1676 1592
1677 retval = file_permission(file, MAY_EXEC); 1593 nd->flags &= ~LOOKUP_PARENT;
1678 if (retval) 1594 return walk_component(nd, path, &nd->last, nd->last_type,
1679 goto fput_fail; 1595 nd->flags & LOOKUP_FOLLOW);
1680
1681 nd->path = file->f_path;
1682 path_get(&file->f_path);
1683
1684 fput_light(file, fput_needed);
1685 }
1686 nd->inode = nd->path.dentry->d_inode;
1687 return 0;
1688
1689fput_fail:
1690 fput_light(file, fput_needed);
1691out_fail:
1692 return retval;
1693} 1596}
1694 1597
1695/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1598/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1696static int do_path_lookup(int dfd, const char *name, 1599static int path_lookupat(int dfd, const char *name,
1697 unsigned int flags, struct nameidata *nd) 1600 unsigned int flags, struct nameidata *nd)
1698{ 1601{
1699 int retval; 1602 struct file *base = NULL;
1603 struct path path;
1604 int err;
1700 1605
1701 /* 1606 /*
1702 * Path walking is largely split up into 2 different synchronisation 1607 * Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
1712 * be handled by restarting a traditional ref-walk (which will always 1617 * be handled by restarting a traditional ref-walk (which will always
1713 * be able to complete). 1618 * be able to complete).
1714 */ 1619 */
1715 retval = path_init_rcu(dfd, name, flags, nd); 1620 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1716 if (unlikely(retval)) 1621
1717 return retval; 1622 if (unlikely(err))
1718 retval = path_walk_rcu(name, nd); 1623 return err;
1719 path_finish_rcu(nd); 1624
1720 if (nd->root.mnt) { 1625 current->total_link_count = 0;
1721 path_put(&nd->root); 1626 err = link_path_walk(name, nd);
1722 nd->root.mnt = NULL; 1627
1628 if (!err && !(flags & LOOKUP_PARENT)) {
1629 err = lookup_last(nd, &path);
1630 while (err > 0) {
1631 void *cookie;
1632 struct path link = path;
1633 nd->flags |= LOOKUP_PARENT;
1634 err = follow_link(&link, nd, &cookie);
1635 if (!err)
1636 err = lookup_last(nd, &path);
1637 put_link(nd, &link, cookie);
1638 }
1723 } 1639 }
1724 1640
1725 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1641 if (nd->flags & LOOKUP_RCU) {
1726 /* slower, locked walk */ 1642 /* went all way through without dropping RCU */
1727 if (retval == -ESTALE) 1643 BUG_ON(err);
1728 flags |= LOOKUP_REVAL; 1644 if (nameidata_drop_rcu_last(nd))
1729 retval = path_init(dfd, name, flags, nd); 1645 err = -ECHILD;
1730 if (unlikely(retval)) 1646 }
1731 return retval; 1647
1732 retval = path_walk(name, nd); 1648 if (!err)
1733 if (nd->root.mnt) { 1649 err = handle_reval_path(nd);
1734 path_put(&nd->root); 1650
1735 nd->root.mnt = NULL; 1651 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1652 if (!nd->inode->i_op->lookup) {
1653 path_put(&nd->path);
1654 return -ENOTDIR;
1736 } 1655 }
1737 } 1656 }
1738 1657
1658 if (base)
1659 fput(base);
1660
1661 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1662 path_put(&nd->root);
1663 nd->root.mnt = NULL;
1664 }
1665 return err;
1666}
1667
1668static int do_path_lookup(int dfd, const char *name,
1669 unsigned int flags, struct nameidata *nd)
1670{
1671 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1672 if (unlikely(retval == -ECHILD))
1673 retval = path_lookupat(dfd, name, flags, nd);
1674 if (unlikely(retval == -ESTALE))
1675 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1676
1739 if (likely(!retval)) { 1677 if (likely(!retval)) {
1740 if (unlikely(!audit_dummy_context())) { 1678 if (unlikely(!audit_dummy_context())) {
1741 if (nd->path.dentry && nd->inode) 1679 if (nd->path.dentry && nd->inode)
1742 audit_inode(name, nd->path.dentry); 1680 audit_inode(name, nd->path.dentry);
1743 } 1681 }
1744 } 1682 }
1745
1746 return retval; 1683 return retval;
1747} 1684}
1748 1685
1749int path_lookup(const char *name, unsigned int flags, 1686int kern_path_parent(const char *name, struct nameidata *nd)
1750 struct nameidata *nd)
1751{ 1687{
1752 return do_path_lookup(AT_FDCWD, name, flags, nd); 1688 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1753} 1689}
1754 1690
1755int kern_path(const char *name, unsigned int flags, struct path *path) 1691int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1773 const char *name, unsigned int flags, 1709 const char *name, unsigned int flags,
1774 struct nameidata *nd) 1710 struct nameidata *nd)
1775{ 1711{
1776 int retval; 1712 nd->root.dentry = dentry;
1777 1713 nd->root.mnt = mnt;
1778 /* same as do_path_lookup */ 1714 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1779 nd->last_type = LAST_ROOT; 1715 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1780 nd->flags = flags;
1781 nd->depth = 0;
1782
1783 nd->path.dentry = dentry;
1784 nd->path.mnt = mnt;
1785 path_get(&nd->path);
1786 nd->root = nd->path;
1787 path_get(&nd->root);
1788 nd->inode = nd->path.dentry->d_inode;
1789
1790 retval = path_walk(name, nd);
1791 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1792 nd->inode))
1793 audit_inode(name, nd->path.dentry);
1794
1795 path_put(&nd->root);
1796 nd->root.mnt = NULL;
1797
1798 return retval;
1799} 1716}
1800 1717
1801static struct dentry *__lookup_hash(struct qstr *name, 1718static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1810 return ERR_PTR(err); 1727 return ERR_PTR(err);
1811 1728
1812 /* 1729 /*
1813 * See if the low-level filesystem might want
1814 * to use its own hash..
1815 */
1816 if (base->d_flags & DCACHE_OP_HASH) {
1817 err = base->d_op->d_hash(base, inode, name);
1818 dentry = ERR_PTR(err);
1819 if (err < 0)
1820 goto out;
1821 }
1822
1823 /*
1824 * Don't bother with __d_lookup: callers are for creat as 1730 * Don't bother with __d_lookup: callers are for creat as
1825 * well as unlink, so a lot of the time it would cost 1731 * well as unlink, so a lot of the time it would cost
1826 * a double lookup. 1732 * a double lookup.
@@ -1832,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1832 1738
1833 if (!dentry) 1739 if (!dentry)
1834 dentry = d_alloc_and_lookup(base, name, nd); 1740 dentry = d_alloc_and_lookup(base, name, nd);
1835out: 1741
1836 return dentry; 1742 return dentry;
1837} 1743}
1838 1744
@@ -1846,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1846 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1752 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1847} 1753}
1848 1754
1849static int __lookup_one_len(const char *name, struct qstr *this,
1850 struct dentry *base, int len)
1851{
1852 unsigned long hash;
1853 unsigned int c;
1854
1855 this->name = name;
1856 this->len = len;
1857 if (!len)
1858 return -EACCES;
1859
1860 hash = init_name_hash();
1861 while (len--) {
1862 c = *(const unsigned char *)name++;
1863 if (c == '/' || c == '\0')
1864 return -EACCES;
1865 hash = partial_name_hash(c, hash);
1866 }
1867 this->hash = end_name_hash(hash);
1868 return 0;
1869}
1870
1871/** 1755/**
1872 * lookup_one_len - filesystem helper to lookup single pathname component 1756 * lookup_one_len - filesystem helper to lookup single pathname component
1873 * @name: pathname component to lookup 1757 * @name: pathname component to lookup
@@ -1881,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1881 */ 1765 */
1882struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1766struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1883{ 1767{
1884 int err;
1885 struct qstr this; 1768 struct qstr this;
1769 unsigned long hash;
1770 unsigned int c;
1886 1771
1887 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1772 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1888 1773
1889 err = __lookup_one_len(name, &this, base, len); 1774 this.name = name;
1890 if (err) 1775 this.len = len;
1891 return ERR_PTR(err); 1776 if (!len)
1777 return ERR_PTR(-EACCES);
1778
1779 hash = init_name_hash();
1780 while (len--) {
1781 c = *(const unsigned char *)name++;
1782 if (c == '/' || c == '\0')
1783 return ERR_PTR(-EACCES);
1784 hash = partial_name_hash(c, hash);
1785 }
1786 this.hash = end_name_hash(hash);
1787 /*
1788 * See if the low-level filesystem might want
1789 * to use its own hash..
1790 */
1791 if (base->d_flags & DCACHE_OP_HASH) {
1792 int err = base->d_op->d_hash(base, base->d_inode, &this);
1793 if (err < 0)
1794 return ERR_PTR(err);
1795 }
1892 1796
1893 return __lookup_hash(&this, base, NULL); 1797 return __lookup_hash(&this, base, NULL);
1894} 1798}
@@ -1897,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1897 struct path *path) 1801 struct path *path)
1898{ 1802{
1899 struct nameidata nd; 1803 struct nameidata nd;
1900 char *tmp = getname(name); 1804 char *tmp = getname_flags(name, flags);
1901 int err = PTR_ERR(tmp); 1805 int err = PTR_ERR(tmp);
1902 if (!IS_ERR(tmp)) { 1806 if (!IS_ERR(tmp)) {
1903 1807
@@ -2077,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2077 return error; 1981 return error;
2078} 1982}
2079 1983
2080int may_open(struct path *path, int acc_mode, int flag) 1984static int may_open(struct path *path, int acc_mode, int flag)
2081{ 1985{
2082 struct dentry *dentry = path->dentry; 1986 struct dentry *dentry = path->dentry;
2083 struct inode *inode = dentry->d_inode; 1987 struct inode *inode = dentry->d_inode;
2084 int error; 1988 int error;
2085 1989
1990 /* O_PATH? */
1991 if (!acc_mode)
1992 return 0;
1993
2086 if (!inode) 1994 if (!inode)
2087 return -ENOENT; 1995 return -ENOENT;
2088 1996
@@ -2151,34 +2059,6 @@ static int handle_truncate(struct file *filp)
2151} 2059}
2152 2060
2153/* 2061/*
2154 * Be careful about ever adding any more callers of this
2155 * function. Its flags must be in the namei format, not
2156 * what get passed to sys_open().
2157 */
2158static int __open_namei_create(struct nameidata *nd, struct path *path,
2159 int open_flag, int mode)
2160{
2161 int error;
2162 struct dentry *dir = nd->path.dentry;
2163
2164 if (!IS_POSIXACL(dir->d_inode))
2165 mode &= ~current_umask();
2166 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2167 if (error)
2168 goto out_unlock;
2169 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2170out_unlock:
2171 mutex_unlock(&dir->d_inode->i_mutex);
2172 dput(nd->path.dentry);
2173 nd->path.dentry = path->dentry;
2174
2175 if (error)
2176 return error;
2177 /* Don't check for write permission, don't truncate */
2178 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2179}
2180
2181/*
2182 * Note that while the flag value (low two bits) for sys_open means: 2062 * Note that while the flag value (low two bits) for sys_open means:
2183 * 00 - read-only 2063 * 00 - read-only
2184 * 01 - write-only 2064 * 01 - write-only
@@ -2202,126 +2082,115 @@ static inline int open_to_namei_flags(int flag)
2202 return flag; 2082 return flag;
2203} 2083}
2204 2084
2205static int open_will_truncate(int flag, struct inode *inode)
2206{
2207 /*
2208 * We'll never write to the fs underlying
2209 * a device file.
2210 */
2211 if (special_file(inode->i_mode))
2212 return 0;
2213 return (flag & O_TRUNC);
2214}
2215
2216static struct file *finish_open(struct nameidata *nd,
2217 int open_flag, int acc_mode)
2218{
2219 struct file *filp;
2220 int will_truncate;
2221 int error;
2222
2223 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2224 if (will_truncate) {
2225 error = mnt_want_write(nd->path.mnt);
2226 if (error)
2227 goto exit;
2228 }
2229 error = may_open(&nd->path, acc_mode, open_flag);
2230 if (error) {
2231 if (will_truncate)
2232 mnt_drop_write(nd->path.mnt);
2233 goto exit;
2234 }
2235 filp = nameidata_to_filp(nd);
2236 if (!IS_ERR(filp)) {
2237 error = ima_file_check(filp, acc_mode);
2238 if (error) {
2239 fput(filp);
2240 filp = ERR_PTR(error);
2241 }
2242 }
2243 if (!IS_ERR(filp)) {
2244 if (will_truncate) {
2245 error = handle_truncate(filp);
2246 if (error) {
2247 fput(filp);
2248 filp = ERR_PTR(error);
2249 }
2250 }
2251 }
2252 /*
2253 * It is now safe to drop the mnt write
2254 * because the filp has had a write taken
2255 * on its behalf.
2256 */
2257 if (will_truncate)
2258 mnt_drop_write(nd->path.mnt);
2259 path_put(&nd->path);
2260 return filp;
2261
2262exit:
2263 path_put(&nd->path);
2264 return ERR_PTR(error);
2265}
2266
2267/* 2085/*
2268 * Handle O_CREAT case for do_filp_open 2086 * Handle the last step of open()
2269 */ 2087 */
2270static struct file *do_last(struct nameidata *nd, struct path *path, 2088static struct file *do_last(struct nameidata *nd, struct path *path,
2271 int open_flag, int acc_mode, 2089 const struct open_flags *op, const char *pathname)
2272 int mode, const char *pathname)
2273{ 2090{
2274 struct dentry *dir = nd->path.dentry; 2091 struct dentry *dir = nd->path.dentry;
2092 struct dentry *dentry;
2093 int open_flag = op->open_flag;
2094 int will_truncate = open_flag & O_TRUNC;
2095 int want_write = 0;
2096 int acc_mode = op->acc_mode;
2275 struct file *filp; 2097 struct file *filp;
2276 int error = -EISDIR; 2098 int error;
2099
2100 nd->flags &= ~LOOKUP_PARENT;
2101 nd->flags |= op->intent;
2277 2102
2278 switch (nd->last_type) { 2103 switch (nd->last_type) {
2279 case LAST_DOTDOT: 2104 case LAST_DOTDOT:
2280 follow_dotdot(nd);
2281 dir = nd->path.dentry;
2282 case LAST_DOT: 2105 case LAST_DOT:
2283 if (need_reval_dot(dir)) { 2106 error = handle_dots(nd, nd->last_type);
2284 int status = d_revalidate(nd->path.dentry, nd); 2107 if (error)
2285 if (!status) 2108 return ERR_PTR(error);
2286 status = -ESTALE;
2287 if (status < 0) {
2288 error = status;
2289 goto exit;
2290 }
2291 }
2292 /* fallthrough */ 2109 /* fallthrough */
2293 case LAST_ROOT: 2110 case LAST_ROOT:
2294 goto exit; 2111 if (nd->flags & LOOKUP_RCU) {
2112 if (nameidata_drop_rcu_last(nd))
2113 return ERR_PTR(-ECHILD);
2114 }
2115 error = handle_reval_path(nd);
2116 if (error)
2117 goto exit;
2118 audit_inode(pathname, nd->path.dentry);
2119 if (open_flag & O_CREAT) {
2120 error = -EISDIR;
2121 goto exit;
2122 }
2123 goto ok;
2295 case LAST_BIND: 2124 case LAST_BIND:
2125 /* can't be RCU mode here */
2126 error = handle_reval_path(nd);
2127 if (error)
2128 goto exit;
2296 audit_inode(pathname, dir); 2129 audit_inode(pathname, dir);
2297 goto ok; 2130 goto ok;
2298 } 2131 }
2299 2132
2133 if (!(open_flag & O_CREAT)) {
2134 int symlink_ok = 0;
2135 if (nd->last.name[nd->last.len])
2136 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2137 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2138 symlink_ok = 1;
2139 /* we _can_ be in RCU mode here */
2140 error = walk_component(nd, path, &nd->last, LAST_NORM,
2141 !symlink_ok);
2142 if (error < 0)
2143 return ERR_PTR(error);
2144 if (error) /* symlink */
2145 return NULL;
2146 /* sayonara */
2147 if (nd->flags & LOOKUP_RCU) {
2148 if (nameidata_drop_rcu_last(nd))
2149 return ERR_PTR(-ECHILD);
2150 }
2151
2152 error = -ENOTDIR;
2153 if (nd->flags & LOOKUP_DIRECTORY) {
2154 if (!nd->inode->i_op->lookup)
2155 goto exit;
2156 }
2157 audit_inode(pathname, nd->path.dentry);
2158 goto ok;
2159 }
2160
2161 /* create side of things */
2162
2163 if (nd->flags & LOOKUP_RCU) {
2164 if (nameidata_drop_rcu_last(nd))
2165 return ERR_PTR(-ECHILD);
2166 }
2167
2168 audit_inode(pathname, dir);
2169 error = -EISDIR;
2300 /* trailing slashes? */ 2170 /* trailing slashes? */
2301 if (nd->last.name[nd->last.len]) 2171 if (nd->last.name[nd->last.len])
2302 goto exit; 2172 goto exit;
2303 2173
2304 mutex_lock(&dir->d_inode->i_mutex); 2174 mutex_lock(&dir->d_inode->i_mutex);
2305 2175
2306 path->dentry = lookup_hash(nd); 2176 dentry = lookup_hash(nd);
2307 path->mnt = nd->path.mnt; 2177 error = PTR_ERR(dentry);
2308 2178 if (IS_ERR(dentry)) {
2309 error = PTR_ERR(path->dentry);
2310 if (IS_ERR(path->dentry)) {
2311 mutex_unlock(&dir->d_inode->i_mutex); 2179 mutex_unlock(&dir->d_inode->i_mutex);
2312 goto exit; 2180 goto exit;
2313 } 2181 }
2314 2182
2315 if (IS_ERR(nd->intent.open.file)) { 2183 path->dentry = dentry;
2316 error = PTR_ERR(nd->intent.open.file); 2184 path->mnt = nd->path.mnt;
2317 goto exit_mutex_unlock;
2318 }
2319 2185
2320 /* Negative dentry, just create the file */ 2186 /* Negative dentry, just create the file */
2321 if (!path->dentry->d_inode) { 2187 if (!dentry->d_inode) {
2188 int mode = op->mode;
2189 if (!IS_POSIXACL(dir->d_inode))
2190 mode &= ~current_umask();
2322 /* 2191 /*
2323 * This write is needed to ensure that a 2192 * This write is needed to ensure that a
2324 * ro->rw transition does not occur between 2193 * rw->ro transition does not occur between
2325 * the time when the file is created and when 2194 * the time when the file is created and when
2326 * a permanent write count is taken through 2195 * a permanent write count is taken through
2327 * the 'struct file' in nameidata_to_filp(). 2196 * the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2329 error = mnt_want_write(nd->path.mnt); 2198 error = mnt_want_write(nd->path.mnt);
2330 if (error) 2199 if (error)
2331 goto exit_mutex_unlock; 2200 goto exit_mutex_unlock;
2332 error = __open_namei_create(nd, path, open_flag, mode); 2201 want_write = 1;
2333 if (error) { 2202 /* Don't check for write permission, don't truncate */
2334 mnt_drop_write(nd->path.mnt); 2203 open_flag &= ~O_TRUNC;
2335 goto exit; 2204 will_truncate = 0;
2336 } 2205 acc_mode = MAY_OPEN;
2337 filp = nameidata_to_filp(nd); 2206 error = security_path_mknod(&nd->path, dentry, mode, 0);
2338 mnt_drop_write(nd->path.mnt); 2207 if (error)
2339 path_put(&nd->path); 2208 goto exit_mutex_unlock;
2340 if (!IS_ERR(filp)) { 2209 error = vfs_create(dir->d_inode, dentry, mode, nd);
2341 error = ima_file_check(filp, acc_mode); 2210 if (error)
2342 if (error) { 2211 goto exit_mutex_unlock;
2343 fput(filp); 2212 mutex_unlock(&dir->d_inode->i_mutex);
2344 filp = ERR_PTR(error); 2213 dput(nd->path.dentry);
2345 } 2214 nd->path.dentry = dentry;
2346 } 2215 goto common;
2347 return filp;
2348 } 2216 }
2349 2217
2350 /* 2218 /*
@@ -2374,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2374 if (S_ISDIR(nd->inode->i_mode)) 2242 if (S_ISDIR(nd->inode->i_mode))
2375 goto exit; 2243 goto exit;
2376ok: 2244ok:
2377 filp = finish_open(nd, open_flag, acc_mode); 2245 if (!S_ISREG(nd->inode->i_mode))
2246 will_truncate = 0;
2247
2248 if (will_truncate) {
2249 error = mnt_want_write(nd->path.mnt);
2250 if (error)
2251 goto exit;
2252 want_write = 1;
2253 }
2254common:
2255 error = may_open(&nd->path, acc_mode, open_flag);
2256 if (error)
2257 goto exit;
2258 filp = nameidata_to_filp(nd);
2259 if (!IS_ERR(filp)) {
2260 error = ima_file_check(filp, op->acc_mode);
2261 if (error) {
2262 fput(filp);
2263 filp = ERR_PTR(error);
2264 }
2265 }
2266 if (!IS_ERR(filp)) {
2267 if (will_truncate) {
2268 error = handle_truncate(filp);
2269 if (error) {
2270 fput(filp);
2271 filp = ERR_PTR(error);
2272 }
2273 }
2274 }
2275out:
2276 if (want_write)
2277 mnt_drop_write(nd->path.mnt);
2278 path_put(&nd->path);
2378 return filp; 2279 return filp;
2379 2280
2380exit_mutex_unlock: 2281exit_mutex_unlock:
@@ -2382,197 +2283,103 @@ exit_mutex_unlock:
2382exit_dput: 2283exit_dput:
2383 path_put_conditional(path, nd); 2284 path_put_conditional(path, nd);
2384exit: 2285exit:
2385 path_put(&nd->path); 2286 filp = ERR_PTR(error);
2386 return ERR_PTR(error); 2287 goto out;
2387} 2288}
2388 2289
2389/* 2290static struct file *path_openat(int dfd, const char *pathname,
2390 * Note that the low bits of the passed in "open_flag" 2291 struct nameidata *nd, const struct open_flags *op, int flags)
2391 * are not the same as in the local variable "flag". See
2392 * open_to_namei_flags() for more details.
2393 */
2394struct file *do_filp_open(int dfd, const char *pathname,
2395 int open_flag, int mode, int acc_mode)
2396{ 2292{
2293 struct file *base = NULL;
2397 struct file *filp; 2294 struct file *filp;
2398 struct nameidata nd;
2399 int error;
2400 struct path path; 2295 struct path path;
2401 int count = 0; 2296 int error;
2402 int flag = open_to_namei_flags(open_flag);
2403 int flags;
2404
2405 if (!(open_flag & O_CREAT))
2406 mode = 0;
2407
2408 /* Must never be set by userspace */
2409 open_flag &= ~FMODE_NONOTIFY;
2410
2411 /*
2412 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2413 * check for O_DSYNC if the need any syncing at all we enforce it's
2414 * always set instead of having to deal with possibly weird behaviour
2415 * for malicious applications setting only __O_SYNC.
2416 */
2417 if (open_flag & __O_SYNC)
2418 open_flag |= O_DSYNC;
2419
2420 if (!acc_mode)
2421 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2422
2423 /* O_TRUNC implies we need access checks for write permissions */
2424 if (open_flag & O_TRUNC)
2425 acc_mode |= MAY_WRITE;
2426
2427 /* Allow the LSM permission hook to distinguish append
2428 access from general write access. */
2429 if (open_flag & O_APPEND)
2430 acc_mode |= MAY_APPEND;
2431
2432 flags = LOOKUP_OPEN;
2433 if (open_flag & O_CREAT) {
2434 flags |= LOOKUP_CREATE;
2435 if (open_flag & O_EXCL)
2436 flags |= LOOKUP_EXCL;
2437 }
2438 if (open_flag & O_DIRECTORY)
2439 flags |= LOOKUP_DIRECTORY;
2440 if (!(open_flag & O_NOFOLLOW))
2441 flags |= LOOKUP_FOLLOW;
2442 2297
2443 filp = get_empty_filp(); 2298 filp = get_empty_filp();
2444 if (!filp) 2299 if (!filp)
2445 return ERR_PTR(-ENFILE); 2300 return ERR_PTR(-ENFILE);
2446 2301
2447 filp->f_flags = open_flag; 2302 filp->f_flags = op->open_flag;
2448 nd.intent.open.file = filp; 2303 nd->intent.open.file = filp;
2449 nd.intent.open.flags = flag; 2304 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2450 nd.intent.open.create_mode = mode; 2305 nd->intent.open.create_mode = op->mode;
2451
2452 if (open_flag & O_CREAT)
2453 goto creat;
2454 2306
2455 /* !O_CREAT, simple open */ 2307 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2456 error = do_path_lookup(dfd, pathname, flags, &nd);
2457 if (unlikely(error)) 2308 if (unlikely(error))
2458 goto out_filp; 2309 goto out_filp;
2459 error = -ELOOP;
2460 if (!(nd.flags & LOOKUP_FOLLOW)) {
2461 if (nd.inode->i_op->follow_link)
2462 goto out_path;
2463 }
2464 error = -ENOTDIR;
2465 if (nd.flags & LOOKUP_DIRECTORY) {
2466 if (!nd.inode->i_op->lookup)
2467 goto out_path;
2468 }
2469 audit_inode(pathname, nd.path.dentry);
2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2472 return filp;
2473 2310
2474creat: 2311 current->total_link_count = 0;
2475 /* OK, have to create the file. Find the parent. */ 2312 error = link_path_walk(pathname, nd);
2476 error = path_init_rcu(dfd, pathname,
2477 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2478 if (error)
2479 goto out_filp;
2480 error = path_walk_rcu(pathname, &nd);
2481 path_finish_rcu(&nd);
2482 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2483 /* slower, locked walk */
2484 if (error == -ESTALE) {
2485reval:
2486 flags |= LOOKUP_REVAL;
2487 }
2488 error = path_init(dfd, pathname,
2489 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2490 if (error)
2491 goto out_filp;
2492
2493 error = path_walk_simple(pathname, &nd);
2494 }
2495 if (unlikely(error)) 2313 if (unlikely(error))
2496 goto out_filp; 2314 goto out_filp;
2497 if (unlikely(!audit_dummy_context()))
2498 audit_inode(pathname, nd.path.dentry);
2499 2315
2500 /* 2316 filp = do_last(nd, &path, op, pathname);
2501 * We have the parent and last component.
2502 */
2503 nd.flags = flags;
2504 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2505 while (unlikely(!filp)) { /* trailing symlink */ 2317 while (unlikely(!filp)) { /* trailing symlink */
2506 struct path link = path; 2318 struct path link = path;
2507 struct inode *linki = link.dentry->d_inode;
2508 void *cookie; 2319 void *cookie;
2509 error = -ELOOP; 2320 if (!(nd->flags & LOOKUP_FOLLOW)) {
2510 if (!(nd.flags & LOOKUP_FOLLOW)) 2321 path_put_conditional(&path, nd);
2511 goto exit_dput; 2322 path_put(&nd->path);
2512 if (count++ == 32) 2323 filp = ERR_PTR(-ELOOP);
2513 goto exit_dput; 2324 break;
2514 /*
2515 * This is subtle. Instead of calling do_follow_link() we do
2516 * the thing by hands. The reason is that this way we have zero
2517 * link_count and path_walk() (called from ->follow_link)
2518 * honoring LOOKUP_PARENT. After that we have the parent and
2519 * last component, i.e. we are in the same situation as after
2520 * the first path_walk(). Well, almost - if the last component
2521 * is normal we get its copy stored in nd->last.name and we will
2522 * have to putname() it when we are done. Procfs-like symlinks
2523 * just set LAST_BIND.
2524 */
2525 nd.flags |= LOOKUP_PARENT;
2526 error = security_inode_follow_link(link.dentry, &nd);
2527 if (error)
2528 goto exit_dput;
2529 error = __do_follow_link(&link, &nd, &cookie);
2530 if (unlikely(error)) {
2531 if (!IS_ERR(cookie) && linki->i_op->put_link)
2532 linki->i_op->put_link(link.dentry, &nd, cookie);
2533 /* nd.path had been dropped */
2534 nd.path = link;
2535 goto out_path;
2536 } 2325 }
2537 nd.flags &= ~LOOKUP_PARENT; 2326 nd->flags |= LOOKUP_PARENT;
2538 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2327 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2539 if (linki->i_op->put_link) 2328 error = follow_link(&link, nd, &cookie);
2540 linki->i_op->put_link(link.dentry, &nd, cookie); 2329 if (unlikely(error))
2541 path_put(&link); 2330 filp = ERR_PTR(error);
2331 else
2332 filp = do_last(nd, &path, op, pathname);
2333 put_link(nd, &link, cookie);
2542 } 2334 }
2543out: 2335out:
2544 if (nd.root.mnt) 2336 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2545 path_put(&nd.root); 2337 path_put(&nd->root);
2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2338 if (base)
2547 goto reval; 2339 fput(base);
2548 release_open_intent(&nd); 2340 release_open_intent(nd);
2549 return filp; 2341 return filp;
2550 2342
2551exit_dput:
2552 path_put_conditional(&path, &nd);
2553out_path:
2554 path_put(&nd.path);
2555out_filp: 2343out_filp:
2556 filp = ERR_PTR(error); 2344 filp = ERR_PTR(error);
2557 goto out; 2345 goto out;
2558} 2346}
2559 2347
2560/** 2348struct file *do_filp_open(int dfd, const char *pathname,
2561 * filp_open - open file and return file pointer 2349 const struct open_flags *op, int flags)
2562 * 2350{
2563 * @filename: path to open 2351 struct nameidata nd;
2564 * @flags: open flags as per the open(2) second argument 2352 struct file *filp;
2565 * @mode: mode for the new file if O_CREAT is set, else ignored 2353
2566 * 2354 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2567 * This is the helper to open a file from kernelspace if you really 2355 if (unlikely(filp == ERR_PTR(-ECHILD)))
2568 * have to. But in generally you should not do this, so please move 2356 filp = path_openat(dfd, pathname, &nd, op, flags);
2569 * along, nothing to see here.. 2357 if (unlikely(filp == ERR_PTR(-ESTALE)))
2570 */ 2358 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2571struct file *filp_open(const char *filename, int flags, int mode) 2359 return filp;
2360}
2361
2362struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2363 const char *name, const struct open_flags *op, int flags)
2572{ 2364{
2573 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2365 struct nameidata nd;
2366 struct file *file;
2367
2368 nd.root.mnt = mnt;
2369 nd.root.dentry = dentry;
2370
2371 flags |= LOOKUP_ROOT;
2372
2373 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2374 return ERR_PTR(-ELOOP);
2375
2376 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2377 if (unlikely(file == ERR_PTR(-ECHILD)))
2378 file = path_openat(-1, name, &nd, op, flags);
2379 if (unlikely(file == ERR_PTR(-ESTALE)))
2380 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2381 return file;
2574} 2382}
2575EXPORT_SYMBOL(filp_open);
2576 2383
2577/** 2384/**
2578 * lookup_create - lookup a dentry, creating it if it doesn't exist 2385 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3111,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3111 return error; 2918 return error;
3112 2919
3113 mutex_lock(&inode->i_mutex); 2920 mutex_lock(&inode->i_mutex);
3114 error = dir->i_op->link(old_dentry, dir, new_dentry); 2921 /* Make sure we don't allow creating hardlink to an unlinked file */
2922 if (inode->i_nlink == 0)
2923 error = -ENOENT;
2924 else
2925 error = dir->i_op->link(old_dentry, dir, new_dentry);
3115 mutex_unlock(&inode->i_mutex); 2926 mutex_unlock(&inode->i_mutex);
3116 if (!error) 2927 if (!error)
3117 fsnotify_link(dir, inode, new_dentry); 2928 fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3133 struct dentry *new_dentry; 2944 struct dentry *new_dentry;
3134 struct nameidata nd; 2945 struct nameidata nd;
3135 struct path old_path; 2946 struct path old_path;
2947 int how = 0;
3136 int error; 2948 int error;
3137 char *to; 2949 char *to;
3138 2950
3139 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2951 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3140 return -EINVAL; 2952 return -EINVAL;
2953 /*
2954 * To use null names we require CAP_DAC_READ_SEARCH
2955 * This ensures that not everyone will be able to create
2956 * handlink using the passed filedescriptor.
2957 */
2958 if (flags & AT_EMPTY_PATH) {
2959 if (!capable(CAP_DAC_READ_SEARCH))
2960 return -ENOENT;
2961 how = LOOKUP_EMPTY;
2962 }
2963
2964 if (flags & AT_SYMLINK_FOLLOW)
2965 how |= LOOKUP_FOLLOW;
3141 2966
3142 error = user_path_at(olddfd, oldname, 2967 error = user_path_at(olddfd, oldname, how, &old_path);
3143 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3144 &old_path);
3145 if (error) 2968 if (error)
3146 return error; 2969 return error;
3147 2970
@@ -3578,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
3578EXPORT_SYMBOL(__page_symlink); 3401EXPORT_SYMBOL(__page_symlink);
3579EXPORT_SYMBOL(page_symlink); 3402EXPORT_SYMBOL(page_symlink);
3580EXPORT_SYMBOL(page_symlink_inode_operations); 3403EXPORT_SYMBOL(page_symlink_inode_operations);
3581EXPORT_SYMBOL(path_lookup); 3404EXPORT_SYMBOL(kern_path_parent);
3582EXPORT_SYMBOL(kern_path); 3405EXPORT_SYMBOL(kern_path);
3583EXPORT_SYMBOL(vfs_path_lookup); 3406EXPORT_SYMBOL(vfs_path_lookup);
3584EXPORT_SYMBOL(inode_permission); 3407EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d7513485c1f3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -978,7 +978,13 @@ static int show_vfsmnt(struct seq_file *m, void *v)
978 int err = 0; 978 int err = 0;
979 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 979 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
980 980
981 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 981 if (mnt->mnt_sb->s_op->show_devname) {
982 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
983 if (err)
984 goto out;
985 } else {
986 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
987 }
982 seq_putc(m, ' '); 988 seq_putc(m, ' ');
983 seq_path(m, &mnt_path, " \t\n\\"); 989 seq_path(m, &mnt_path, " \t\n\\");
984 seq_putc(m, ' '); 990 seq_putc(m, ' ');
@@ -1002,6 +1008,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1008 .show = show_vfsmnt
1003}; 1009};
1004 1010
1011static int uuid_is_nil(u8 *uuid)
1012{
1013 int i;
1014 u8 *cp = (u8 *)uuid;
1015
1016 for (i = 0; i < 16; i++) {
1017 if (*cp++)
1018 return 0;
1019 }
1020 return 1;
1021}
1022
1005static int show_mountinfo(struct seq_file *m, void *v) 1023static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1024{
1007 struct proc_mounts *p = m->private; 1025 struct proc_mounts *p = m->private;
@@ -1013,7 +1031,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
1013 1031
1014 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id, 1032 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
1015 MAJOR(sb->s_dev), MINOR(sb->s_dev)); 1033 MAJOR(sb->s_dev), MINOR(sb->s_dev));
1016 seq_dentry(m, mnt->mnt_root, " \t\n\\"); 1034 if (sb->s_op->show_path)
1035 err = sb->s_op->show_path(m, mnt);
1036 else
1037 seq_dentry(m, mnt->mnt_root, " \t\n\\");
1038 if (err)
1039 goto out;
1017 seq_putc(m, ' '); 1040 seq_putc(m, ' ');
1018 seq_path_root(m, &mnt_path, &root, " \t\n\\"); 1041 seq_path_root(m, &mnt_path, &root, " \t\n\\");
1019 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { 1042 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
@@ -1040,11 +1063,20 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1063 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1064 seq_puts(m, " unbindable");
1042 1065
1066 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1067 /* print the uuid */
1068 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1069
1043 /* Filesystem specific data */ 1070 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1071 seq_puts(m, " - ");
1045 show_type(m, sb); 1072 show_type(m, sb);
1046 seq_putc(m, ' '); 1073 seq_putc(m, ' ');
1047 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 1074 if (sb->s_op->show_devname)
1075 err = sb->s_op->show_devname(m, mnt);
1076 else
1077 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1078 if (err)
1079 goto out;
1048 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); 1080 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
1049 err = show_sb_opts(m, sb); 1081 err = show_sb_opts(m, sb);
1050 if (err) 1082 if (err)
@@ -1070,11 +1102,15 @@ static int show_vfsstat(struct seq_file *m, void *v)
1070 int err = 0; 1102 int err = 0;
1071 1103
1072 /* device */ 1104 /* device */
1073 if (mnt->mnt_devname) { 1105 if (mnt->mnt_sb->s_op->show_devname) {
1074 seq_puts(m, "device "); 1106 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1075 mangle(m, mnt->mnt_devname); 1107 } else {
1076 } else 1108 if (mnt->mnt_devname) {
1077 seq_puts(m, "no device"); 1109 seq_puts(m, "device ");
1110 mangle(m, mnt->mnt_devname);
1111 } else
1112 seq_puts(m, "no device");
1113 }
1078 1114
1079 /* mount point */ 1115 /* mount point */
1080 seq_puts(m, " mounted on "); 1116 seq_puts(m, " mounted on ");
@@ -1088,7 +1124,8 @@ static int show_vfsstat(struct seq_file *m, void *v)
1088 /* optional statistics */ 1124 /* optional statistics */
1089 if (mnt->mnt_sb->s_op->show_stats) { 1125 if (mnt->mnt_sb->s_op->show_stats) {
1090 seq_putc(m, ' '); 1126 seq_putc(m, ' ');
1091 err = mnt->mnt_sb->s_op->show_stats(m, mnt); 1127 if (!err)
1128 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
1092 } 1129 }
1093 1130
1094 seq_putc(m, '\n'); 1131 seq_putc(m, '\n');
@@ -1244,7 +1281,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1281 */
1245 br_write_lock(vfsmount_lock); 1282 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1283 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1284 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1285 return -EBUSY;
1249 } 1286 }
1250 br_write_unlock(vfsmount_lock); 1287 br_write_unlock(vfsmount_lock);
@@ -1767,6 +1804,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1767 if (path->dentry != path->mnt->mnt_root) 1804 if (path->dentry != path->mnt->mnt_root)
1768 return -EINVAL; 1805 return -EINVAL;
1769 1806
1807 err = security_sb_remount(sb, data);
1808 if (err)
1809 return err;
1810
1770 down_write(&sb->s_umount); 1811 down_write(&sb->s_umount);
1771 if (flags & MS_BIND) 1812 if (flags & MS_BIND)
1772 err = change_mount_flags(path->mnt, flags); 1813 err = change_mount_flags(path->mnt, flags);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
188 rv = NFS4ERR_DELAY; 188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall); 189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock); 190 spin_unlock(&ino->i_lock);
191 pnfs_free_lseg_list(&free_me_list);
191 put_layout_hdr(lo); 192 put_layout_hdr(lo);
192 iput(ino); 193 iput(ino);
193 } 194 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv; 195 return rv;
196} 196}
197 197
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bd3ca32879e7..139be9647d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -82,6 +82,11 @@ retry:
82#endif /* CONFIG_NFS_V4 */ 82#endif /* CONFIG_NFS_V4 */
83 83
84/* 84/*
85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
86 */
87static int nfs4_disable_idmapping = 0;
88
89/*
85 * RPC cruft for NFS 90 * RPC cruft for NFS
86 */ 91 */
87static struct rpc_version *nfs_version[5] = { 92static struct rpc_version *nfs_version[5] = {
@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
481 * Look up a client by IP address and protocol version 486 * Look up a client by IP address and protocol version
482 * - creates a new record if one doesn't yet exist 487 * - creates a new record if one doesn't yet exist
483 */ 488 */
484static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) 489static struct nfs_client *
490nfs_get_client(const struct nfs_client_initdata *cl_init,
491 const struct rpc_timeout *timeparms,
492 const char *ip_addr,
493 rpc_authflavor_t authflavour,
494 int noresvport)
485{ 495{
486 struct nfs_client *clp, *new = NULL; 496 struct nfs_client *clp, *new = NULL;
487 int error; 497 int error;
@@ -512,6 +522,13 @@ install_client:
512 clp = new; 522 clp = new;
513 list_add(&clp->cl_share_link, &nfs_client_list); 523 list_add(&clp->cl_share_link, &nfs_client_list);
514 spin_unlock(&nfs_client_lock); 524 spin_unlock(&nfs_client_lock);
525
526 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
527 authflavour, noresvport);
528 if (error < 0) {
529 nfs_put_client(clp);
530 return ERR_PTR(error);
531 }
515 dprintk("--> nfs_get_client() = %p [new]\n", clp); 532 dprintk("--> nfs_get_client() = %p [new]\n", clp);
516 return clp; 533 return clp;
517 534
@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
767/* 784/*
768 * Initialise an NFS2 or NFS3 client 785 * Initialise an NFS2 or NFS3 client
769 */ 786 */
770static int nfs_init_client(struct nfs_client *clp, 787int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
771 const struct rpc_timeout *timeparms, 788 const char *ip_addr, rpc_authflavor_t authflavour,
772 const struct nfs_parsed_mount_data *data) 789 int noresvport)
773{ 790{
774 int error; 791 int error;
775 792
@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
784 * - RFC 2623, sec 2.3.2 801 * - RFC 2623, sec 2.3.2
785 */ 802 */
786 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 803 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
787 0, data->flags & NFS_MOUNT_NORESVPORT); 804 0, noresvport);
788 if (error < 0) 805 if (error < 0)
789 goto error; 806 goto error;
790 nfs_mark_client_ready(clp, NFS_CS_READY); 807 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
820 cl_init.rpc_ops = &nfs_v3_clientops; 837 cl_init.rpc_ops = &nfs_v3_clientops;
821#endif 838#endif
822 839
840 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
841 data->timeo, data->retrans);
842
823 /* Allocate or find a client reference we can use */ 843 /* Allocate or find a client reference we can use */
824 clp = nfs_get_client(&cl_init); 844 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
845 data->flags & NFS_MOUNT_NORESVPORT);
825 if (IS_ERR(clp)) { 846 if (IS_ERR(clp)) {
826 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 847 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
827 return PTR_ERR(clp); 848 return PTR_ERR(clp);
828 } 849 }
829 850
830 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
831 data->timeo, data->retrans);
832 error = nfs_init_client(clp, &timeparms, data);
833 if (error < 0)
834 goto error;
835
836 server->nfs_client = clp; 851 server->nfs_client = clp;
837 852
838 /* Initialise the client representation from the mount data */ 853 /* Initialise the client representation from the mount data */
@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
1009 spin_lock(&nfs_client_lock); 1024 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1025 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list); 1026 list_add_tail(&server->master_link, &nfs_volume_list);
1027 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1012 spin_unlock(&nfs_client_lock); 1028 spin_unlock(&nfs_client_lock);
1013 1029
1014} 1030}
1015 1031
1016static void nfs_server_remove_lists(struct nfs_server *server) 1032static void nfs_server_remove_lists(struct nfs_server *server)
1017{ 1033{
1034 struct nfs_client *clp = server->nfs_client;
1035
1018 spin_lock(&nfs_client_lock); 1036 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link); 1037 list_del_rcu(&server->client_link);
1038 if (clp && list_empty(&clp->cl_superblocks))
1039 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1020 list_del(&server->master_link); 1040 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock); 1041 spin_unlock(&nfs_client_lock);
1022 1042
@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1307/* 1327/*
1308 * Initialise an NFS4 client record 1328 * Initialise an NFS4 client record
1309 */ 1329 */
1310static int nfs4_init_client(struct nfs_client *clp, 1330int nfs4_init_client(struct nfs_client *clp,
1311 const struct rpc_timeout *timeparms, 1331 const struct rpc_timeout *timeparms,
1312 const char *ip_addr, 1332 const char *ip_addr,
1313 rpc_authflavor_t authflavour, 1333 rpc_authflavor_t authflavour,
1314 int flags) 1334 int noresvport)
1315{ 1335{
1316 int error; 1336 int error;
1317 1337
@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
1325 clp->rpc_ops = &nfs_v4_clientops; 1345 clp->rpc_ops = &nfs_v4_clientops;
1326 1346
1327 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1347 error = nfs_create_rpc_client(clp, timeparms, authflavour,
1328 1, flags & NFS_MOUNT_NORESVPORT); 1348 1, noresvport);
1329 if (error < 0) 1349 if (error < 0)
1330 goto error; 1350 goto error;
1331 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1351 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
1378 dprintk("--> nfs4_set_client()\n"); 1398 dprintk("--> nfs4_set_client()\n");
1379 1399
1380 /* Allocate or find a client reference we can use */ 1400 /* Allocate or find a client reference we can use */
1381 clp = nfs_get_client(&cl_init); 1401 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
1402 server->flags & NFS_MOUNT_NORESVPORT);
1382 if (IS_ERR(clp)) { 1403 if (IS_ERR(clp)) {
1383 error = PTR_ERR(clp); 1404 error = PTR_ERR(clp);
1384 goto error; 1405 goto error;
1385 } 1406 }
1386 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, 1407
1387 server->flags); 1408 /*
1388 if (error < 0) 1409 * Query for the lease time on clientid setup or renewal
1389 goto error_put; 1410 *
1411 * Note that this will be set on nfs_clients that were created
1412 * only for the DS role and did not set this bit, but now will
1413 * serve a dual role.
1414 */
1415 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
1390 1416
1391 server->nfs_client = clp; 1417 server->nfs_client = clp;
1392 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); 1418 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
1393 return 0; 1419 return 0;
1394
1395error_put:
1396 nfs_put_client(clp);
1397error: 1420error:
1398 dprintk("<-- nfs4_set_client() = xerror %d\n", error); 1421 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
1399 return error; 1422 return error;
1400} 1423}
1401 1424
1425/*
1426 * Set up a pNFS Data Server client.
1427 *
1428 * Return any existing nfs_client that matches server address,port,version
1429 * and minorversion.
1430 *
1431 * For a new nfs_client, use a soft mount (default), a low retrans and a
1432 * low timeout interval so that if a connection is lost, we retry through
1433 * the MDS.
1434 */
1435struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1436 const struct sockaddr *ds_addr,
1437 int ds_addrlen, int ds_proto)
1438{
1439 struct nfs_client_initdata cl_init = {
1440 .addr = ds_addr,
1441 .addrlen = ds_addrlen,
1442 .rpc_ops = &nfs_v4_clientops,
1443 .proto = ds_proto,
1444 .minorversion = mds_clp->cl_minorversion,
1445 };
1446 struct rpc_timeout ds_timeout = {
1447 .to_initval = 15 * HZ,
1448 .to_maxval = 15 * HZ,
1449 .to_retries = 1,
1450 .to_exponential = 1,
1451 };
1452 struct nfs_client *clp;
1453
1454 /*
1455 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
1456 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1457 * (section 13.1 RFC 5661).
1458 */
1459 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1460 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
1461
1462 dprintk("<-- %s %p\n", __func__, clp);
1463 return clp;
1464}
1465EXPORT_SYMBOL(nfs4_set_ds_client);
1402 1466
1403/* 1467/*
1404 * Session has been established, and the client marked ready. 1468 * Session has been established, and the client marked ready.
@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1435 BUG_ON(!server->nfs_client->rpc_ops); 1499 BUG_ON(!server->nfs_client->rpc_ops);
1436 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1500 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1437 1501
1502 /* data servers support only a subset of NFSv4.1 */
1503 if (is_ds_only_client(server->nfs_client))
1504 return -EPROTONOSUPPORT;
1505
1438 fattr = nfs_alloc_fattr(); 1506 fattr = nfs_alloc_fattr();
1439 if (fattr == NULL) 1507 if (fattr == NULL)
1440 return -ENOMEM; 1508 return -ENOMEM;
@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
1504 if (error < 0) 1572 if (error < 0)
1505 goto error; 1573 goto error;
1506 1574
1575 /*
1576 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
1577 * authentication.
1578 */
1579 if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
1580 server->caps |= NFS_CAP_UIDGID_NOMAP;
1581
1507 if (data->rsize) 1582 if (data->rsize)
1508 server->rsize = nfs_block_size(data->rsize, NULL); 1583 server->rsize = nfs_block_size(data->rsize, NULL);
1509 if (data->wsize) 1584 if (data->wsize)
@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
1921} 1996}
1922 1997
1923#endif /* CONFIG_PROC_FS */ 1998#endif /* CONFIG_PROC_FS */
1999
2000module_param(nfs4_disable_idmapping, bool, 0644);
2001MODULE_PARM_DESC(nfs4_disable_idmapping,
2002 "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c3eb33b904d..abdf38d5971d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1169,11 +1169,23 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1169 iput(inode); 1169 iput(inode);
1170} 1170}
1171 1171
1172static void nfs_d_release(struct dentry *dentry)
1173{
1174 /* free cached devname value, if it survived that far */
1175 if (unlikely(dentry->d_fsdata)) {
1176 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1177 WARN_ON(1);
1178 else
1179 kfree(dentry->d_fsdata);
1180 }
1181}
1182
1172const struct dentry_operations nfs_dentry_operations = { 1183const struct dentry_operations nfs_dentry_operations = {
1173 .d_revalidate = nfs_lookup_revalidate, 1184 .d_revalidate = nfs_lookup_revalidate,
1174 .d_delete = nfs_dentry_delete, 1185 .d_delete = nfs_dentry_delete,
1175 .d_iput = nfs_dentry_iput, 1186 .d_iput = nfs_dentry_iput,
1176 .d_automount = nfs_d_automount, 1187 .d_automount = nfs_d_automount,
1188 .d_release = nfs_d_release,
1177}; 1189};
1178 1190
1179static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1191static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1248,6 +1260,7 @@ const struct dentry_operations nfs4_dentry_operations = {
1248 .d_delete = nfs_dentry_delete, 1260 .d_delete = nfs_dentry_delete,
1249 .d_iput = nfs_dentry_iput, 1261 .d_iput = nfs_dentry_iput,
1250 .d_automount = nfs_d_automount, 1262 .d_automount = nfs_d_automount,
1263 .d_release = nfs_d_release,
1251}; 1264};
1252 1265
1253/* 1266/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9943a75bb6d1..8eea25366717 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -45,6 +45,7 @@
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h>
48 49
49#include <linux/nfs_fs.h> 50#include <linux/nfs_fs.h>
50#include <linux/nfs_page.h> 51#include <linux/nfs_page.h>
@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
649{ 650{
650 struct nfs_write_data *data = calldata; 651 struct nfs_write_data *data = calldata;
651 652
652 if (nfs_writeback_done(task, data) != 0) 653 nfs_writeback_done(task, data);
653 return;
654} 654}
655 655
656/* 656/*
@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
938 if (retval) 938 if (retval)
939 goto out; 939 goto out;
940 940
941 task_io_account_read(count);
942
941 retval = nfs_direct_read(iocb, iov, nr_segs, pos); 943 retval = nfs_direct_read(iocb, iov, nr_segs, pos);
942 if (retval > 0) 944 if (retval > 0)
943 iocb->ki_pos = pos + retval; 945 iocb->ki_pos = pos + retval;
@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
999 if (retval) 1001 if (retval)
1000 goto out; 1002 goto out;
1001 1003
1004 task_io_account_write(count);
1005
1002 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 1006 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
1003 1007
1004 if (retval > 0) 1008 if (retval > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7bf029ef4084..d85a534b15cd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
387 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
388 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
389 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
394start: 390start:
395 /* 391 /*
396 * Prevent starvation issues if someone is doing a consistency 392 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b5ffe8fa291f..1084792bc0fe 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -75,18 +75,25 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
75/* 75/*
76 * get an NFS2/NFS3 root dentry from the root filehandle 76 * get an NFS2/NFS3 root dentry from the root filehandle
77 */ 77 */
78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) 78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
79 const char *devname)
79{ 80{
80 struct nfs_server *server = NFS_SB(sb); 81 struct nfs_server *server = NFS_SB(sb);
81 struct nfs_fsinfo fsinfo; 82 struct nfs_fsinfo fsinfo;
82 struct dentry *ret; 83 struct dentry *ret;
83 struct inode *inode; 84 struct inode *inode;
85 void *name = kstrdup(devname, GFP_KERNEL);
84 int error; 86 int error;
85 87
88 if (!name)
89 return ERR_PTR(-ENOMEM);
90
86 /* get the actual root for this mount */ 91 /* get the actual root for this mount */
87 fsinfo.fattr = nfs_alloc_fattr(); 92 fsinfo.fattr = nfs_alloc_fattr();
88 if (fsinfo.fattr == NULL) 93 if (fsinfo.fattr == NULL) {
94 kfree(name);
89 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 }
90 97
91 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 98 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
92 if (error < 0) { 99 if (error < 0) {
@@ -119,7 +126,15 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 } 126 }
120 127
121 security_d_instantiate(ret, inode); 128 security_d_instantiate(ret, inode);
129 spin_lock(&ret->d_lock);
130 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
131 ret->d_fsdata = name;
132 name = NULL;
133 }
134 spin_unlock(&ret->d_lock);
122out: 135out:
136 if (name)
137 kfree(name);
123 nfs_free_fattr(fsinfo.fattr); 138 nfs_free_fattr(fsinfo.fattr);
124 return ret; 139 return ret;
125} 140}
@@ -169,27 +184,35 @@ out:
169/* 184/*
170 * get an NFS4 root dentry from the root filehandle 185 * get an NFS4 root dentry from the root filehandle
171 */ 186 */
172struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 187struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
188 const char *devname)
173{ 189{
174 struct nfs_server *server = NFS_SB(sb); 190 struct nfs_server *server = NFS_SB(sb);
175 struct nfs_fattr *fattr = NULL; 191 struct nfs_fattr *fattr = NULL;
176 struct dentry *ret; 192 struct dentry *ret;
177 struct inode *inode; 193 struct inode *inode;
194 void *name = kstrdup(devname, GFP_KERNEL);
178 int error; 195 int error;
179 196
180 dprintk("--> nfs4_get_root()\n"); 197 dprintk("--> nfs4_get_root()\n");
181 198
199 if (!name)
200 return ERR_PTR(-ENOMEM);
201
182 /* get the info about the server and filesystem */ 202 /* get the info about the server and filesystem */
183 error = nfs4_server_capabilities(server, mntfh); 203 error = nfs4_server_capabilities(server, mntfh);
184 if (error < 0) { 204 if (error < 0) {
185 dprintk("nfs_get_root: getcaps error = %d\n", 205 dprintk("nfs_get_root: getcaps error = %d\n",
186 -error); 206 -error);
207 kfree(name);
187 return ERR_PTR(error); 208 return ERR_PTR(error);
188 } 209 }
189 210
190 fattr = nfs_alloc_fattr(); 211 fattr = nfs_alloc_fattr();
191 if (fattr == NULL) 212 if (fattr == NULL) {
192 return ERR_PTR(-ENOMEM);; 213 kfree(name);
214 return ERR_PTR(-ENOMEM);
215 }
193 216
194 /* get the actual root for this mount */ 217 /* get the actual root for this mount */
195 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); 218 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
@@ -223,8 +246,15 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
223 } 246 }
224 247
225 security_d_instantiate(ret, inode); 248 security_d_instantiate(ret, inode);
226 249 spin_lock(&ret->d_lock);
250 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
251 ret->d_fsdata = name;
252 name = NULL;
253 }
254 spin_unlock(&ret->d_lock);
227out: 255out:
256 if (name)
257 kfree(name);
228 nfs_free_fattr(fattr); 258 nfs_free_fattr(fattr);
229 dprintk("<-- nfs4_get_root()\n"); 259 dprintk("<-- nfs4_get_root()\n");
230 return ret; 260 return ret;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 18696882f1c6..79664a1025af 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -33,16 +33,41 @@
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h>
37#include <linux/string.h>
38#include <linux/kernel.h>
39
40static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
41{
42 unsigned long val;
43 char buf[16];
44
45 if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
46 return 0;
47 memcpy(buf, name, namelen);
48 buf[namelen] = '\0';
49 if (strict_strtoul(buf, 0, &val) != 0)
50 return 0;
51 *res = val;
52 return 1;
53}
54
55static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
56{
57 return snprintf(buf, buflen, "%u", id);
58}
36 59
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 60#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38 61
39#include <linux/slab.h> 62#include <linux/slab.h>
40#include <linux/cred.h> 63#include <linux/cred.h>
64#include <linux/sunrpc/sched.h>
65#include <linux/nfs4.h>
66#include <linux/nfs_fs_sb.h>
41#include <linux/nfs_idmap.h> 67#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h> 68#include <linux/keyctl.h>
43#include <linux/key-type.h> 69#include <linux/key-type.h>
44#include <linux/rcupdate.h> 70#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h> 71#include <linux/err.h>
47 72
48#include <keys/user-type.h> 73#include <keys/user-type.h>
@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
219 return ret; 244 return ret;
220} 245}
221 246
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 247int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
223{ 248{
249 if (nfs_map_string_to_numeric(name, namelen, uid))
250 return 0;
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid); 251 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225} 252}
226 253
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) 254int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
228{ 255{
256 if (nfs_map_string_to_numeric(name, namelen, gid))
257 return 0;
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid); 258 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230} 259}
231 260
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 261int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
233{ 262{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen); 263 int ret = -EINVAL;
264
265 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
266 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
267 if (ret < 0)
268 ret = nfs_map_numeric_to_string(uid, buf, buflen);
269 return ret;
235} 270}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) 271int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
237{ 272{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 273 int ret = -EINVAL;
274
275 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
276 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
277 if (ret < 0)
278 ret = nfs_map_numeric_to_string(gid, buf, buflen);
279 return ret;
239} 280}
240 281
241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ 282#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
243#include <linux/module.h> 284#include <linux/module.h>
244#include <linux/mutex.h> 285#include <linux/mutex.h>
245#include <linux/init.h> 286#include <linux/init.h>
246#include <linux/types.h>
247#include <linux/slab.h> 287#include <linux/slab.h>
248#include <linux/socket.h> 288#include <linux/socket.h>
249#include <linux/in.h> 289#include <linux/in.h>
@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
695 return hash; 735 return hash;
696} 736}
697 737
698int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 738int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
699{ 739{
700 struct idmap *idmap = clp->cl_idmap; 740 struct idmap *idmap = server->nfs_client->cl_idmap;
701 741
742 if (nfs_map_string_to_numeric(name, namelen, uid))
743 return 0;
702 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 744 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
703} 745}
704 746
705int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 747int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
706{ 748{
707 struct idmap *idmap = clp->cl_idmap; 749 struct idmap *idmap = server->nfs_client->cl_idmap;
708 750
751 if (nfs_map_string_to_numeric(name, namelen, uid))
752 return 0;
709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 753 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
710} 754}
711 755
712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 756int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
713{ 757{
714 struct idmap *idmap = clp->cl_idmap; 758 struct idmap *idmap = server->nfs_client->cl_idmap;
759 int ret = -EINVAL;
715 760
716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 761 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
762 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
763 if (ret < 0)
764 ret = nfs_map_numeric_to_string(uid, buf, buflen);
765 return ret;
717} 766}
718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 767int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
719{ 768{
720 struct idmap *idmap = clp->cl_idmap; 769 struct idmap *idmap = server->nfs_client->cl_idmap;
770 int ret = -EINVAL;
721 771
722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 772 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
773 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
774 if (ret < 0)
775 ret = nfs_map_numeric_to_string(uid, buf, buflen);
776 return ret;
723} 777}
724 778
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ 779#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
1513{ 1518{
1514 struct workqueue_struct *wq; 1519 struct workqueue_struct *wq;
1515 dprintk("RPC: creating workqueue nfsiod\n"); 1520 dprintk("RPC: creating workqueue nfsiod\n");
1516 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1521 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1517 if (wq == NULL) 1522 if (wq == NULL)
1518 return -ENOMEM; 1523 return -ENOMEM;
1519 nfsiod_workqueue = wq; 1524 nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cf9fdbdabc67..72e0bddf7a2f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
148 struct nfs_fattr *); 148 struct nfs_fattr *);
149extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 149extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
150extern int nfs4_check_client_ready(struct nfs_client *clp); 150extern int nfs4_check_client_ready(struct nfs_client *clp);
151extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
152 const struct sockaddr *ds_addr,
153 int ds_addrlen, int ds_proto);
151#ifdef CONFIG_PROC_FS 154#ifdef CONFIG_PROC_FS
152extern int __init nfs_fs_proc_init(void); 155extern int __init nfs_fs_proc_init(void);
153extern void nfs_fs_proc_exit(void); 156extern void nfs_fs_proc_exit(void);
@@ -163,10 +166,10 @@ static inline void nfs_fs_proc_exit(void)
163 166
164/* nfs4namespace.c */ 167/* nfs4namespace.c */
165#ifdef CONFIG_NFS_V4 168#ifdef CONFIG_NFS_V4
166extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 169extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
167#else 170#else
168static inline 171static inline
169struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 172struct vfsmount *nfs_do_refmount(struct dentry *dentry)
170{ 173{
171 return ERR_PTR(-ENOENT); 174 return ERR_PTR(-ENOENT);
172} 175}
@@ -213,8 +216,14 @@ extern const u32 nfs41_maxwrite_overhead;
213extern struct rpc_procinfo nfs4_procedures[]; 216extern struct rpc_procinfo nfs4_procedures[];
214#endif 217#endif
215 218
219extern int nfs4_init_ds_session(struct nfs_client *clp);
220
216/* proc.c */ 221/* proc.c */
217void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 222void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
223extern int nfs_init_client(struct nfs_client *clp,
224 const struct rpc_timeout *timeparms,
225 const char *ip_addr, rpc_authflavor_t authflavour,
226 int noresvport);
218 227
219/* dir.c */ 228/* dir.c */
220extern int nfs_access_cache_shrinker(struct shrinker *shrink, 229extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -247,24 +256,30 @@ extern void nfs_sb_active(struct super_block *sb);
247extern void nfs_sb_deactive(struct super_block *sb); 256extern void nfs_sb_deactive(struct super_block *sb);
248 257
249/* namespace.c */ 258/* namespace.c */
250extern char *nfs_path(const char *base, 259extern char *nfs_path(char **p, struct dentry *dentry,
251 const struct dentry *droot,
252 const struct dentry *dentry,
253 char *buffer, ssize_t buflen); 260 char *buffer, ssize_t buflen);
254extern struct vfsmount *nfs_d_automount(struct path *path); 261extern struct vfsmount *nfs_d_automount(struct path *path);
255 262
256/* getroot.c */ 263/* getroot.c */
257extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); 264extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
265 const char *);
258#ifdef CONFIG_NFS_V4 266#ifdef CONFIG_NFS_V4
259extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 267extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
268 const char *);
260 269
261extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 270extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
262#endif 271#endif
263 272
264/* read.c */ 273/* read.c */
274extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
275 const struct rpc_call_ops *call_ops);
265extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 276extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
266 277
267/* write.c */ 278/* write.c */
279extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops,
282 int how);
268extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
269#ifdef CONFIG_MIGRATION 284#ifdef CONFIG_MIGRATION
270extern int nfs_migrate_page(struct address_space *, 285extern int nfs_migrate_page(struct address_space *,
@@ -274,6 +289,13 @@ extern int nfs_migrate_page(struct address_space *,
274#endif 289#endif
275 290
276/* nfs4proc.c */ 291/* nfs4proc.c */
292extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
293extern int nfs4_init_client(struct nfs_client *clp,
294 const struct rpc_timeout *timeparms,
295 const char *ip_addr,
296 rpc_authflavor_t authflavour,
297 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
277extern int _nfs4_call_sync(struct nfs_server *server, 299extern int _nfs4_call_sync(struct nfs_server *server,
278 struct rpc_message *msg, 300 struct rpc_message *msg,
279 struct nfs4_sequence_args *args, 301 struct nfs4_sequence_args *args,
@@ -288,12 +310,11 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
288/* 310/*
289 * Determine the device name as a string 311 * Determine the device name as a string
290 */ 312 */
291static inline char *nfs_devname(const struct vfsmount *mnt_parent, 313static inline char *nfs_devname(struct dentry *dentry,
292 const struct dentry *dentry,
293 char *buffer, ssize_t buflen) 314 char *buffer, ssize_t buflen)
294{ 315{
295 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, 316 char *dummy;
296 dentry, buffer, buflen); 317 return nfs_path(&dummy, dentry, buffer, buflen);
297} 318}
298 319
299/* 320/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b8603dca8..c0b8344db0c6 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -25,33 +25,30 @@ static LIST_HEAD(nfs_automount_list);
25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
26int nfs_mountpoint_expiry_timeout = 500 * HZ; 26int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 27
28static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 28static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 const struct dentry *dentry,
30 struct nfs_fh *fh, 29 struct nfs_fh *fh,
31 struct nfs_fattr *fattr); 30 struct nfs_fattr *fattr);
32 31
33/* 32/*
34 * nfs_path - reconstruct the path given an arbitrary dentry 33 * nfs_path - reconstruct the path given an arbitrary dentry
35 * @base - arbitrary string to prepend to the path 34 * @base - used to return pointer to the end of devname part of path
36 * @droot - pointer to root dentry for mountpoint
37 * @dentry - pointer to dentry 35 * @dentry - pointer to dentry
38 * @buffer - result buffer 36 * @buffer - result buffer
39 * @buflen - length of buffer 37 * @buflen - length of buffer
40 * 38 *
41 * Helper function for constructing the path from the 39 * Helper function for constructing the server pathname
42 * root dentry to an arbitrary hashed dentry. 40 * by arbitrary hashed dentry.
43 * 41 *
44 * This is mainly for use in figuring out the path on the 42 * This is mainly for use in figuring out the path on the
45 * server side when automounting on top of an existing partition. 43 * server side when automounting on top of an existing partition
44 * and in generating /proc/mounts and friends.
46 */ 45 */
47char *nfs_path(const char *base, 46char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
48 const struct dentry *droot,
49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen)
51{ 47{
52 char *end; 48 char *end;
53 int namelen; 49 int namelen;
54 unsigned seq; 50 unsigned seq;
51 const char *base;
55 52
56rename_retry: 53rename_retry:
57 end = buffer+buflen; 54 end = buffer+buflen;
@@ -60,7 +57,10 @@ rename_retry:
60 57
61 seq = read_seqbegin(&rename_lock); 58 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock(); 59 rcu_read_lock();
63 while (!IS_ROOT(dentry) && dentry != droot) { 60 while (1) {
61 spin_lock(&dentry->d_lock);
62 if (IS_ROOT(dentry))
63 break;
64 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
65 buflen -= namelen + 1; 65 buflen -= namelen + 1;
66 if (buflen < 0) 66 if (buflen < 0)
@@ -68,27 +68,47 @@ rename_retry:
68 end -= namelen; 68 end -= namelen;
69 memcpy(end, dentry->d_name.name, namelen); 69 memcpy(end, dentry->d_name.name, namelen);
70 *--end = '/'; 70 *--end = '/';
71 spin_unlock(&dentry->d_lock);
71 dentry = dentry->d_parent; 72 dentry = dentry->d_parent;
72 } 73 }
73 rcu_read_unlock(); 74 if (read_seqretry(&rename_lock, seq)) {
74 if (read_seqretry(&rename_lock, seq)) 75 spin_unlock(&dentry->d_lock);
76 rcu_read_unlock();
75 goto rename_retry; 77 goto rename_retry;
78 }
76 if (*end != '/') { 79 if (*end != '/') {
77 if (--buflen < 0) 80 if (--buflen < 0) {
81 spin_unlock(&dentry->d_lock);
82 rcu_read_unlock();
78 goto Elong; 83 goto Elong;
84 }
79 *--end = '/'; 85 *--end = '/';
80 } 86 }
87 *p = end;
88 base = dentry->d_fsdata;
89 if (!base) {
90 spin_unlock(&dentry->d_lock);
91 rcu_read_unlock();
92 WARN_ON(1);
93 return end;
94 }
81 namelen = strlen(base); 95 namelen = strlen(base);
82 /* Strip off excess slashes in base string */ 96 /* Strip off excess slashes in base string */
83 while (namelen > 0 && base[namelen - 1] == '/') 97 while (namelen > 0 && base[namelen - 1] == '/')
84 namelen--; 98 namelen--;
85 buflen -= namelen; 99 buflen -= namelen;
86 if (buflen < 0) 100 if (buflen < 0) {
101 spin_lock(&dentry->d_lock);
102 rcu_read_unlock();
87 goto Elong; 103 goto Elong;
104 }
88 end -= namelen; 105 end -= namelen;
89 memcpy(end, base, namelen); 106 memcpy(end, base, namelen);
107 spin_unlock(&dentry->d_lock);
108 rcu_read_unlock();
90 return end; 109 return end;
91Elong_unlock: 110Elong_unlock:
111 spin_lock(&dentry->d_lock);
92 rcu_read_unlock(); 112 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq)) 113 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry; 114 goto rename_retry;
@@ -143,9 +163,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
143 } 163 }
144 164
145 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
146 mnt = nfs_do_refmount(path->mnt, path->dentry); 166 mnt = nfs_do_refmount(path->dentry);
147 else 167 else
148 mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); 168 mnt = nfs_do_submount(path->dentry, fh, fattr);
149 if (IS_ERR(mnt)) 169 if (IS_ERR(mnt))
150 goto out; 170 goto out;
151 171
@@ -209,19 +229,17 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
209 229
210/** 230/**
211 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary 231 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
212 * @mnt_parent - mountpoint of parent directory
213 * @dentry - parent directory 232 * @dentry - parent directory
214 * @fh - filehandle for new root dentry 233 * @fh - filehandle for new root dentry
215 * @fattr - attributes for new root inode 234 * @fattr - attributes for new root inode
216 * 235 *
217 */ 236 */
218static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 237static struct vfsmount *nfs_do_submount(struct dentry *dentry,
219 const struct dentry *dentry,
220 struct nfs_fh *fh, 238 struct nfs_fh *fh,
221 struct nfs_fattr *fattr) 239 struct nfs_fattr *fattr)
222{ 240{
223 struct nfs_clone_mount mountdata = { 241 struct nfs_clone_mount mountdata = {
224 .sb = mnt_parent->mnt_sb, 242 .sb = dentry->d_sb,
225 .dentry = dentry, 243 .dentry = dentry,
226 .fh = fh, 244 .fh = fh,
227 .fattr = fattr, 245 .fattr = fattr,
@@ -237,11 +255,11 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
237 dentry->d_name.name); 255 dentry->d_name.name);
238 if (page == NULL) 256 if (page == NULL)
239 goto out; 257 goto out;
240 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 258 devname = nfs_devname(dentry, page, PAGE_SIZE);
241 mnt = (struct vfsmount *)devname; 259 mnt = (struct vfsmount *)devname;
242 if (IS_ERR(devname)) 260 if (IS_ERR(devname))
243 goto free_page; 261 goto free_page;
244 mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); 262 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
245free_page: 263free_page:
246 free_page((unsigned long)page); 264 free_page((unsigned long)page);
247out: 265out:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce939c062a52..d0c80d8b3f96 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
885 .lock = nfs3_proc_lock, 885 .lock = nfs3_proc_lock,
886 .clear_acl_cache = nfs3_forget_cached_acls, 886 .clear_acl_cache = nfs3_forget_cached_acls,
887 .close_context = nfs_close_context, 887 .close_context = nfs_close_context,
888 .init_client = nfs_init_client,
888}; 889};
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..c64be1cff080 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
252extern int nfs4_setup_sequence(const struct nfs_server *server, 252extern int nfs4_setup_sequence(const struct nfs_server *server,
253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
254 int cache_reply, struct rpc_task *task); 254 int cache_reply, struct rpc_task *task);
255extern int nfs41_setup_sequence(struct nfs4_session *session,
256 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
257 int cache_reply, struct rpc_task *task);
255extern void nfs4_destroy_session(struct nfs4_session *session); 258extern void nfs4_destroy_session(struct nfs4_session *session);
256extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 259extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
257extern int nfs4_proc_create_session(struct nfs_client *); 260extern int nfs4_proc_create_session(struct nfs_client *);
@@ -259,6 +262,19 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
259extern int nfs4_init_session(struct nfs_server *server); 262extern int nfs4_init_session(struct nfs_server *server);
260extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 263extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
261 struct nfs_fsinfo *fsinfo); 264 struct nfs_fsinfo *fsinfo);
265
266static inline bool
267is_ds_only_client(struct nfs_client *clp)
268{
269 return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
270 EXCHGID4_FLAG_USE_PNFS_DS;
271}
272
273static inline bool
274is_ds_client(struct nfs_client *clp)
275{
276 return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
277}
262#else /* CONFIG_NFS_v4_1 */ 278#else /* CONFIG_NFS_v4_1 */
263static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 279static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
264{ 280{
@@ -276,6 +292,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
276{ 292{
277 return 0; 293 return 0;
278} 294}
295
296static inline bool
297is_ds_only_client(struct nfs_client *clp)
298{
299 return false;
300}
301
302static inline bool
303is_ds_client(struct nfs_client *clp)
304{
305 return false;
306}
279#endif /* CONFIG_NFS_V4_1 */ 307#endif /* CONFIG_NFS_V4_1 */
280 308
281extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; 309extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -298,6 +326,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 326#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 327struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 328struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
329extern void nfs4_schedule_session_recovery(struct nfs4_session *);
330#else
331static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
332{
333}
301#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
302 335
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 336extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +340,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 340extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 341extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 342extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 343extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 344extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 345extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 346extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 347extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 348extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 23f930caf1e2..428558464817 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -40,32 +40,309 @@ MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); 40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver"); 41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42 42
43static int 43#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
44filelayout_set_layoutdriver(struct nfs_server *nfss) 44
45static loff_t
46filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
47 loff_t offset)
45{ 48{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, 49 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
47 nfs4_fl_free_deviceid_callback); 50 u64 tmp;
48 if (status) { 51
49 printk(KERN_WARNING "%s: deviceid cache could not be " 52 offset -= flseg->pattern_offset;
50 "initialized\n", __func__); 53 tmp = offset;
51 return status; 54 do_div(tmp, stripe_width);
55
56 return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
57}
58
59/* This function is used by the layout driver to calculate the
60 * offset of the file on the dserver based on whether the
61 * layout type is STRIPE_DENSE or STRIPE_SPARSE
62 */
63static loff_t
64filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
65{
66 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
67
68 switch (flseg->stripe_type) {
69 case STRIPE_SPARSE:
70 return offset;
71
72 case STRIPE_DENSE:
73 return filelayout_get_dense_offset(flseg, offset);
52 } 74 }
53 dprintk("%s: deviceid cache has been initialized successfully\n", 75
54 __func__); 76 BUG();
77}
78
79/* For data server errors we don't recover from */
80static void
81filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
82{
83 if (lseg->pls_range.iomode == IOMODE_RW) {
84 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
85 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
86 } else {
87 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
88 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
89 }
90}
91
92static int filelayout_async_handle_error(struct rpc_task *task,
93 struct nfs4_state *state,
94 struct nfs_client *clp,
95 int *reset)
96{
97 if (task->tk_status >= 0)
98 return 0;
99
100 *reset = 0;
101
102 switch (task->tk_status) {
103 case -NFS4ERR_BADSESSION:
104 case -NFS4ERR_BADSLOT:
105 case -NFS4ERR_BAD_HIGH_SLOT:
106 case -NFS4ERR_DEADSESSION:
107 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
108 case -NFS4ERR_SEQ_FALSE_RETRY:
109 case -NFS4ERR_SEQ_MISORDERED:
110 dprintk("%s ERROR %d, Reset session. Exchangeid "
111 "flags 0x%x\n", __func__, task->tk_status,
112 clp->cl_exchange_flags);
113 nfs4_schedule_session_recovery(clp->cl_session);
114 break;
115 case -NFS4ERR_DELAY:
116 case -NFS4ERR_GRACE:
117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break;
120 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status);
123 *reset = 1;
124 break;
125 }
126 task->tk_status = 0;
127 return -EAGAIN;
128}
129
130/* NFS_PROTO call done callback routines */
131
132static int filelayout_read_done_cb(struct rpc_task *task,
133 struct nfs_read_data *data)
134{
135 struct nfs_client *clp = data->ds_clp;
136 int reset = 0;
137
138 dprintk("%s DS read\n", __func__);
139
140 if (filelayout_async_handle_error(task, data->args.context->state,
141 data->ds_clp, &reset) == -EAGAIN) {
142 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
143 __func__, data->ds_clp, data->ds_clp->cl_session);
144 if (reset) {
145 filelayout_set_lo_fail(data->lseg);
146 nfs4_reset_read(task, data);
147 clp = NFS_SERVER(data->inode)->nfs_client;
148 }
149 nfs_restart_rpc(task, clp);
150 return -EAGAIN;
151 }
152
55 return 0; 153 return 0;
56} 154}
57 155
58/* Clear out the layout by destroying its device list */ 156/*
59static int 157 * Call ops for the async read/write cases
60filelayout_clear_layoutdriver(struct nfs_server *nfss) 158 * In the case of dense layouts, the offset needs to be reset to its
159 * original value.
160 */
161static void filelayout_read_prepare(struct rpc_task *task, void *data)
61{ 162{
62 dprintk("--> %s\n", __func__); 163 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
164
165 rdata->read_done_cb = filelayout_read_done_cb;
166
167 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
168 &rdata->args.seq_args, &rdata->res.seq_res,
169 0, task))
170 return;
171
172 rpc_call_start(task);
173}
174
175static void filelayout_read_call_done(struct rpc_task *task, void *data)
176{
177 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
178
179 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
180
181 /* Note this may cause RPC to be resent */
182 rdata->mds_ops->rpc_call_done(task, data);
183}
184
185static void filelayout_read_release(void *data)
186{
187 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
188
189 rdata->mds_ops->rpc_release(data);
190}
191
192static int filelayout_write_done_cb(struct rpc_task *task,
193 struct nfs_write_data *data)
194{
195 int reset = 0;
196
197 if (filelayout_async_handle_error(task, data->args.context->state,
198 data->ds_clp, &reset) == -EAGAIN) {
199 struct nfs_client *clp;
200
201 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
202 __func__, data->ds_clp, data->ds_clp->cl_session);
203 if (reset) {
204 filelayout_set_lo_fail(data->lseg);
205 nfs4_reset_write(task, data);
206 clp = NFS_SERVER(data->inode)->nfs_client;
207 } else
208 clp = data->ds_clp;
209 nfs_restart_rpc(task, clp);
210 return -EAGAIN;
211 }
63 212
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0; 213 return 0;
67} 214}
68 215
216static void filelayout_write_prepare(struct rpc_task *task, void *data)
217{
218 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
219
220 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
221 &wdata->args.seq_args, &wdata->res.seq_res,
222 0, task))
223 return;
224
225 rpc_call_start(task);
226}
227
228static void filelayout_write_call_done(struct rpc_task *task, void *data)
229{
230 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
231
232 /* Note this may cause RPC to be resent */
233 wdata->mds_ops->rpc_call_done(task, data);
234}
235
236static void filelayout_write_release(void *data)
237{
238 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
239
240 wdata->mds_ops->rpc_release(data);
241}
242
243struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done,
246 .rpc_release = filelayout_read_release,
247};
248
249struct rpc_call_ops filelayout_write_call_ops = {
250 .rpc_call_prepare = filelayout_write_prepare,
251 .rpc_call_done = filelayout_write_call_done,
252 .rpc_release = filelayout_write_release,
253};
254
255static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data)
257{
258 struct pnfs_layout_segment *lseg = data->lseg;
259 struct nfs4_pnfs_ds *ds;
260 loff_t offset = data->args.offset;
261 u32 j, idx;
262 struct nfs_fh *fh;
263 int status;
264
265 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
266 __func__, data->inode->i_ino,
267 data->args.pgbase, (size_t)data->args.count, offset);
268
269 /* Retrieve the correct rpc_client for the byte range */
270 j = nfs4_fl_calc_j_index(lseg, offset);
271 idx = nfs4_fl_calc_ds_index(lseg, j);
272 ds = nfs4_fl_prepare_ds(lseg, idx);
273 if (!ds) {
274 /* Either layout fh index faulty, or ds connect failed */
275 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
276 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
277 return PNFS_NOT_ATTEMPTED;
278 }
279 dprintk("%s USE DS:ip %x %hu\n", __func__,
280 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
281
282 /* No multipath support. Use first DS */
283 data->ds_clp = ds->ds_clp;
284 fh = nfs4_fl_select_ds_fh(lseg, j);
285 if (fh)
286 data->args.fh = fh;
287
288 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
289 data->mds_offset = offset;
290
291 /* Perform an asynchronous read to ds */
292 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
293 &filelayout_read_call_ops);
294 BUG_ON(status != 0);
295 return PNFS_ATTEMPTED;
296}
297
298/* Perform async writes. */
299static enum pnfs_try_status
300filelayout_write_pagelist(struct nfs_write_data *data, int sync)
301{
302 struct pnfs_layout_segment *lseg = data->lseg;
303 struct nfs4_pnfs_ds *ds;
304 loff_t offset = data->args.offset;
305 u32 j, idx;
306 struct nfs_fh *fh;
307 int status;
308
309 /* Retrieve the correct rpc_client for the byte range */
310 j = nfs4_fl_calc_j_index(lseg, offset);
311 idx = nfs4_fl_calc_ds_index(lseg, j);
312 ds = nfs4_fl_prepare_ds(lseg, idx);
313 if (!ds) {
314 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
315 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
316 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
317 return PNFS_NOT_ATTEMPTED;
318 }
319 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
320 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j);
330 if (fh)
331 data->args.fh = fh;
332 /*
333 * Get the file offset on the dserver. Set the write offset to
334 * this offset and save the original offset.
335 */
336 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
337 data->mds_offset = offset;
338
339 /* Perform an asynchronous write */
340 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
341 &filelayout_write_call_ops, sync);
342 BUG_ON(status != 0);
343 return PNFS_ATTEMPTED;
344}
345
69/* 346/*
70 * filelayout_check_layout() 347 * filelayout_check_layout()
71 * 348 *
@@ -92,14 +369,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
92 goto out; 369 goto out;
93 } 370 }
94 371
95 if (fl->stripe_unit % PAGE_SIZE) { 372 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n", 373 dprintk("%s Invalid stripe unit (%u)\n",
97 __func__, fl->stripe_unit); 374 __func__, fl->stripe_unit);
98 goto out; 375 goto out;
99 } 376 }
100 377
101 /* find and reference the deviceid */ 378 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 379 dsaddr = nfs4_fl_find_get_deviceid(id);
103 if (dsaddr == NULL) { 380 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->plh_inode, id); 381 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 382 if (dsaddr == NULL)
@@ -134,7 +411,7 @@ out:
134 dprintk("--> %s returns %d\n", __func__, status); 411 dprintk("--> %s returns %d\n", __func__, status);
135 return status; 412 return status;
136out_put: 413out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); 414 nfs4_fl_put_deviceid(dsaddr);
138 goto out; 415 goto out;
139} 416}
140 417
@@ -243,23 +520,47 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
243static void 520static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 521filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{ 522{
246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248 524
249 dprintk("--> %s\n", __func__); 525 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, 526 nfs4_fl_put_deviceid(fl->dsaddr);
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl); 527 _filelayout_free_lseg(fl);
253} 528}
254 529
530/*
531 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
532 *
533 * return 1 : coalesce page
534 * return 0 : don't coalesce page
535 */
536int
537filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
538 struct nfs_page *req)
539{
540 u64 p_stripe, r_stripe;
541 u32 stripe_unit;
542
543 if (!pgio->pg_lseg)
544 return 1;
545 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
546 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
547 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
548
549 do_div(p_stripe, stripe_unit);
550 do_div(r_stripe, stripe_unit);
551
552 return (p_stripe == r_stripe);
553}
554
255static struct pnfs_layoutdriver_type filelayout_type = { 555static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES, 556 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES", 557 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE, 558 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver, 559 .alloc_lseg = filelayout_alloc_lseg,
260 .clear_layoutdriver = filelayout_clear_layoutdriver, 560 .free_lseg = filelayout_free_lseg,
261 .alloc_lseg = filelayout_alloc_lseg, 561 .pg_test = filelayout_pg_test,
262 .free_lseg = filelayout_free_lseg, 562 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist,
263}; 564};
264 565
265static int __init nfs4filelayout_init(void) 566static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index bbf60dd2ab9d..ee0c907742b5 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
55 atomic_t ds_count; 55 atomic_t ds_count;
56}; 56};
57 57
58/* nfs4_file_layout_dsaddr flags */
59#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
60
58struct nfs4_file_layout_dsaddr { 61struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid; 62 struct hlist_node node;
63 struct nfs4_deviceid deviceid;
64 atomic_t ref;
65 unsigned long flags;
60 u32 stripe_count; 66 u32 stripe_count;
61 u8 *stripe_indices; 67 u8 *stripe_indices;
62 u32 ds_num; 68 u32 ds_num;
@@ -83,11 +89,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
83 generic_hdr); 89 generic_hdr);
84} 90}
85 91
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); 92extern struct nfs_fh *
93nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
94
87extern void print_ds(struct nfs4_pnfs_ds *ds); 95extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id); 96extern void print_deviceid(struct nfs4_deviceid *dev_id);
97u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
98u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
99struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
100 u32 ds_idx);
89extern struct nfs4_file_layout_dsaddr * 101extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); 102nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
103extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
91struct nfs4_file_layout_dsaddr * 104struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 105get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93 106
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..68143c162e3b 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -37,6 +37,30 @@
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 38
39/* 39/*
40 * Device ID RCU cache. A device ID is unique per client ID and layout type.
41 */
42#define NFS4_FL_DEVICE_ID_HASH_BITS 5
43#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
44#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
45
46static inline u32
47nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
48{
49 unsigned char *cptr = (unsigned char *)id->data;
50 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
51 u32 x = 0;
52
53 while (nbytes--) {
54 x *= 37;
55 x += *cptr++;
56 }
57 return x & NFS4_FL_DEVICE_ID_HASH_MASK;
58}
59
60static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
61static DEFINE_SPINLOCK(filelayout_deviceid_lock);
62
63/*
40 * Data server cache 64 * Data server cache
41 * 65 *
42 * Data servers can be mapped to different device ids. 66 * Data servers can be mapped to different device ids.
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
104 return NULL; 128 return NULL;
105} 129}
106 130
131/*
132 * Create an rpc connection to the nfs4_pnfs_ds data server
133 * Currently only support IPv4
134 */
135static int
136nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
137{
138 struct nfs_client *clp;
139 struct sockaddr_in sin;
140 int status = 0;
141
142 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
143 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
144 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
145
146 sin.sin_family = AF_INET;
147 sin.sin_addr.s_addr = ds->ds_ip_addr;
148 sin.sin_port = ds->ds_port;
149
150 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
151 sizeof(sin), IPPROTO_TCP);
152 if (IS_ERR(clp)) {
153 status = PTR_ERR(clp);
154 goto out;
155 }
156
157 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
158 if (!is_ds_client(clp)) {
159 status = -ENODEV;
160 goto out_put;
161 }
162 ds->ds_clp = clp;
163 dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
164 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
165 goto out;
166 }
167
168 /*
169 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
170 * be equal to the MDS lease. Renewal is scheduled in create_session.
171 */
172 spin_lock(&mds_srv->nfs_client->cl_lock);
173 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
174 spin_unlock(&mds_srv->nfs_client->cl_lock);
175 clp->cl_last_renewal = jiffies;
176
177 /* New nfs_client */
178 status = nfs4_init_ds_session(clp);
179 if (status)
180 goto out_put;
181
182 ds->ds_clp = clp;
183 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
184 ntohs(ds->ds_port));
185out:
186 return status;
187out_put:
188 nfs_put_client(clp);
189 goto out;
190}
191
107static void 192static void
108destroy_ds(struct nfs4_pnfs_ds *ds) 193destroy_ds(struct nfs4_pnfs_ds *ds)
109{ 194{
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
122 struct nfs4_pnfs_ds *ds; 207 struct nfs4_pnfs_ds *ds;
123 int i; 208 int i;
124 209
125 print_deviceid(&dsaddr->deviceid.de_id); 210 print_deviceid(&dsaddr->deviceid);
126 211
127 for (i = 0; i < dsaddr->ds_num; i++) { 212 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i]; 213 ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
139 kfree(dsaddr); 224 kfree(dsaddr);
140} 225}
141 226
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{ 229{
@@ -219,6 +295,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 295 goto out_err;
220 } 296 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 297 buf = kmalloc(rlen + 1, GFP_KERNEL);
298 if (!buf) {
299 dprintk("%s: Not enough memory\n", __func__);
300 goto out_err;
301 }
222 buf[rlen] = '\0'; 302 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 303 memcpy(buf, r_addr, rlen);
224 304
@@ -296,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
296 dsaddr->stripe_count = cnt; 376 dsaddr->stripe_count = cnt;
297 dsaddr->ds_num = num; 377 dsaddr->ds_num = num;
298 378
299 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); 379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
300 380
301 /* Go back an read stripe indices */ 381 /* Go back an read stripe indices */
302 p = indicesp; 382 p = indicesp;
@@ -346,28 +426,37 @@ out_err:
346} 426}
347 427
348/* 428/*
349 * Decode the opaque device specified in 'dev' 429 * Decode the opaque device specified in 'dev' and add it to the cache of
350 * and add it to the list of available devices. 430 * available devices.
351 * If the deviceid is already cached, nfs4_add_deviceid will return
352 * a pointer to the cached struct and throw away the new.
353 */ 431 */
354static struct nfs4_file_layout_dsaddr* 432static struct nfs4_file_layout_dsaddr *
355decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 433decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
356{ 434{
357 struct nfs4_file_layout_dsaddr *dsaddr; 435 struct nfs4_file_layout_dsaddr *d, *new;
358 struct pnfs_deviceid_node *d; 436 long hash;
359 437
360 dsaddr = decode_device(inode, dev); 438 new = decode_device(inode, dev);
361 if (!dsaddr) { 439 if (!new) {
362 printk(KERN_WARNING "%s: Could not decode or add device\n", 440 printk(KERN_WARNING "%s: Could not decode or add device\n",
363 __func__); 441 __func__);
364 return NULL; 442 return NULL;
365 } 443 }
366 444
367 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, 445 spin_lock(&filelayout_deviceid_lock);
368 &dsaddr->deviceid); 446 d = nfs4_fl_find_get_deviceid(&new->deviceid);
447 if (d) {
448 spin_unlock(&filelayout_deviceid_lock);
449 nfs4_fl_free_deviceid(new);
450 return d;
451 }
452
453 INIT_HLIST_NODE(&new->node);
454 atomic_set(&new->ref, 1);
455 hash = nfs4_fl_deviceid_hash(&new->deviceid);
456 hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
457 spin_unlock(&filelayout_deviceid_lock);
369 458
370 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 459 return new;
371} 460}
372 461
373/* 462/*
@@ -442,12 +531,123 @@ out_free:
442 return dsaddr; 531 return dsaddr;
443} 532}
444 533
534void
535nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
536{
537 if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
538 hlist_del_rcu(&dsaddr->node);
539 spin_unlock(&filelayout_deviceid_lock);
540
541 synchronize_rcu();
542 nfs4_fl_free_deviceid(dsaddr);
543 }
544}
545
445struct nfs4_file_layout_dsaddr * 546struct nfs4_file_layout_dsaddr *
446nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) 547nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
548{
549 struct nfs4_file_layout_dsaddr *d;
550 struct hlist_node *n;
551 long hash = nfs4_fl_deviceid_hash(id);
552
553
554 rcu_read_lock();
555 hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
556 if (!memcmp(&d->deviceid, id, sizeof(*id))) {
557 if (!atomic_inc_not_zero(&d->ref))
558 goto fail;
559 rcu_read_unlock();
560 return d;
561 }
562 }
563fail:
564 rcu_read_unlock();
565 return NULL;
566}
567
568/*
569 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
570 * Then: ((res + fsi) % dsaddr->stripe_count)
571 */
572u32
573nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
574{
575 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
576 u64 tmp;
577
578 tmp = offset - flseg->pattern_offset;
579 do_div(tmp, flseg->stripe_unit);
580 tmp += flseg->first_stripe_index;
581 return do_div(tmp, flseg->dsaddr->stripe_count);
582}
583
584u32
585nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
447{ 586{
448 struct pnfs_deviceid_node *d; 587 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
588}
449 589
450 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); 590struct nfs_fh *
451 return (d == NULL) ? NULL : 591nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
452 container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 592{
593 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
594 u32 i;
595
596 if (flseg->stripe_type == STRIPE_SPARSE) {
597 if (flseg->num_fh == 1)
598 i = 0;
599 else if (flseg->num_fh == 0)
600 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
601 return NULL;
602 else
603 i = nfs4_fl_calc_ds_index(lseg, j);
604 } else
605 i = j;
606 return flseg->fh_array[i];
607}
608
609static void
610filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
611 int err, u32 ds_addr)
612{
613 u32 *p = (u32 *)&dsaddr->deviceid;
614
615 printk(KERN_ERR "NFS: data server %x connection error %d."
616 " Deviceid [%x%x%x%x] marked out of use.\n",
617 ds_addr, err, p[0], p[1], p[2], p[3]);
618
619 spin_lock(&filelayout_deviceid_lock);
620 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
621 spin_unlock(&filelayout_deviceid_lock);
622}
623
624struct nfs4_pnfs_ds *
625nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
626{
627 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
628 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
629
630 if (ds == NULL) {
631 printk(KERN_ERR "%s: No data server for offset index %d\n",
632 __func__, ds_idx);
633 return NULL;
634 }
635
636 if (!ds->ds_clp) {
637 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
638 int err;
639
640 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
641 /* Already tried to connect, don't try again */
642 dprintk("%s Deviceid marked out of use\n", __func__);
643 return NULL;
644 }
645 err = nfs4_ds_connect(s, ds);
646 if (err) {
647 filelayout_mark_devid_negative(dsaddr, err,
648 ntohl(ds->ds_ip_addr));
649 return NULL;
650 }
651 }
652 return ds;
453} 653}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3c2a1724fbd2..bb80c49b6533 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -54,33 +54,29 @@ Elong:
54/* 54/*
55 * Determine the mount path as a string 55 * Determine the mount path as a string
56 */ 56 */
57static char *nfs4_path(const struct vfsmount *mnt_parent, 57static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
58 const struct dentry *dentry,
59 char *buffer, ssize_t buflen)
60{ 58{
61 const char *srvpath; 59 char *limit;
62 60 char *path = nfs_path(&limit, dentry, buffer, buflen);
63 srvpath = strchr(mnt_parent->mnt_devname, ':'); 61 if (!IS_ERR(path)) {
64 if (srvpath) 62 char *colon = strchr(path, ':');
65 srvpath++; 63 if (colon && colon < limit)
66 else 64 path = colon + 1;
67 srvpath = mnt_parent->mnt_devname; 65 }
68 66 return path;
69 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
70} 67}
71 68
72/* 69/*
73 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we 70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
74 * believe to be the server path to this dentry 71 * believe to be the server path to this dentry
75 */ 72 */
76static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, 73static int nfs4_validate_fspath(struct dentry *dentry,
77 const struct dentry *dentry,
78 const struct nfs4_fs_locations *locations, 74 const struct nfs4_fs_locations *locations,
79 char *page, char *page2) 75 char *page, char *page2)
80{ 76{
81 const char *path, *fs_path; 77 const char *path, *fs_path;
82 78
83 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); 79 path = nfs4_path(dentry, page, PAGE_SIZE);
84 if (IS_ERR(path)) 80 if (IS_ERR(path))
85 return PTR_ERR(path); 81 return PTR_ERR(path);
86 82
@@ -165,20 +161,18 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
165 161
166/** 162/**
167 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 163 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
168 * @mnt_parent - mountpoint of parent directory
169 * @dentry - parent directory 164 * @dentry - parent directory
170 * @locations - array of NFSv4 server location information 165 * @locations - array of NFSv4 server location information
171 * 166 *
172 */ 167 */
173static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 168static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
174 const struct dentry *dentry,
175 const struct nfs4_fs_locations *locations) 169 const struct nfs4_fs_locations *locations)
176{ 170{
177 struct vfsmount *mnt = ERR_PTR(-ENOENT); 171 struct vfsmount *mnt = ERR_PTR(-ENOENT);
178 struct nfs_clone_mount mountdata = { 172 struct nfs_clone_mount mountdata = {
179 .sb = mnt_parent->mnt_sb, 173 .sb = dentry->d_sb,
180 .dentry = dentry, 174 .dentry = dentry,
181 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 175 .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
182 }; 176 };
183 char *page = NULL, *page2 = NULL; 177 char *page = NULL, *page2 = NULL;
184 int loc, error; 178 int loc, error;
@@ -198,7 +192,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
198 goto out; 192 goto out;
199 193
200 /* Ensure fs path is a prefix of current dentry path */ 194 /* Ensure fs path is a prefix of current dentry path */
201 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); 195 error = nfs4_validate_fspath(dentry, locations, page, page2);
202 if (error < 0) { 196 if (error < 0) {
203 mnt = ERR_PTR(error); 197 mnt = ERR_PTR(error);
204 goto out; 198 goto out;
@@ -225,11 +219,10 @@ out:
225 219
226/* 220/*
227 * nfs_do_refmount - handle crossing a referral on server 221 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
229 * @dentry - dentry of referral 222 * @dentry - dentry of referral
230 * 223 *
231 */ 224 */
232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 225struct vfsmount *nfs_do_refmount(struct dentry *dentry)
233{ 226{
234 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 227 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
235 struct dentry *parent; 228 struct dentry *parent;
@@ -262,7 +255,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
262 fs_locations->fs_path.ncomponents <= 0) 255 fs_locations->fs_path.ncomponents <= 0)
263 goto out_free; 256 goto out_free;
264 257
265 mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); 258 mnt = nfs_follow_referral(dentry, fs_locations);
266out_free: 259out_free:
267 __free_page(page); 260 __free_page(page);
268 kfree(fs_locations); 261 kfree(fs_locations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..1d84e7088af9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -85,6 +85,9 @@ static int nfs4_map_errors(int err)
85 switch (err) { 85 switch (err) {
86 case -NFS4ERR_RESOURCE: 86 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 87 return -EREMOTEIO;
88 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME:
90 return -EINVAL;
88 default: 91 default:
89 dprintk("%s could not handle NFSv4 error %d\n", 92 dprintk("%s could not handle NFSv4 error %d\n",
90 __func__, -err); 93 __func__, -err);
@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
241/* This is the error handling routine for processes that are allowed 244/* This is the error handling routine for processes that are allowed
242 * to sleep. 245 * to sleep.
243 */ 246 */
244static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 247static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
245{ 248{
246 struct nfs_client *clp = server->nfs_client; 249 struct nfs_client *clp = server->nfs_client;
247 struct nfs4_state *state = exception->state; 250 struct nfs4_state *state = exception->state;
@@ -256,12 +259,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 259 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 260 if (state == NULL)
258 break; 261 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 262 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 263 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 264 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 265 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 266 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 267 nfs4_schedule_lease_recovery(clp);
268 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 269#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 270 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 271 case -NFS4ERR_BADSLOT:
@@ -272,7 +276,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 276 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 277 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 278 errorcode);
275 nfs4_schedule_state_recovery(clp); 279 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 280 exception->retry = 1;
277 break; 281 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 282#endif /* defined(CONFIG_NFS_V4_1) */
@@ -292,11 +296,23 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
292 break; 296 break;
293 case -NFS4ERR_OLD_STATEID: 297 case -NFS4ERR_OLD_STATEID:
294 exception->retry = 1; 298 exception->retry = 1;
299 break;
300 case -NFS4ERR_BADOWNER:
301 /* The following works around a Linux server bug! */
302 case -NFS4ERR_BADNAME:
303 if (server->caps & NFS_CAP_UIDGID_NOMAP) {
304 server->caps &= ~NFS_CAP_UIDGID_NOMAP;
305 exception->retry = 1;
306 printk(KERN_WARNING "NFS: v4 server %s "
307 "does not accept raw "
308 "uid/gids. "
309 "Reenabling the idmapper.\n",
310 server->nfs_client->cl_hostname);
311 }
295 } 312 }
296 /* We failed to handle the error */ 313 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 314 return nfs4_map_errors(ret);
298do_state_recovery: 315wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 316 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 317 if (ret == 0)
302 exception->retry = 1; 318 exception->retry = 1;
@@ -435,8 +451,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 451 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 452 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 453 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 454 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 455 nfs4_schedule_lease_recovery(clp);
440 break; 456 break;
441 case -NFS4ERR_DELAY: 457 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 458 /* The server detected a resend of the RPC call and
@@ -505,7 +521,7 @@ out:
505 return ret_id; 521 return ret_id;
506} 522}
507 523
508static int nfs41_setup_sequence(struct nfs4_session *session, 524int nfs41_setup_sequence(struct nfs4_session *session,
509 struct nfs4_sequence_args *args, 525 struct nfs4_sequence_args *args,
510 struct nfs4_sequence_res *res, 526 struct nfs4_sequence_res *res,
511 int cache_reply, 527 int cache_reply,
@@ -571,6 +587,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
571 res->sr_status = 1; 587 res->sr_status = 1;
572 return 0; 588 return 0;
573} 589}
590EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
574 591
575int nfs4_setup_sequence(const struct nfs_server *server, 592int nfs4_setup_sequence(const struct nfs_server *server,
576 struct nfs4_sequence_args *args, 593 struct nfs4_sequence_args *args,
@@ -1255,14 +1272,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1272 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1273 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1274 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1275 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1276 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1277 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1278 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1279 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1280 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1281 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1282 goto out;
1267 case -ERESTARTSYS: 1283 case -ERESTARTSYS:
1268 /* 1284 /*
@@ -1271,7 +1287,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1287 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1288 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1289 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1290 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1291 case -EKEYEXPIRED:
1276 /* 1292 /*
1277 * User RPCSEC_GSS context has expired. 1293 * User RPCSEC_GSS context has expired.
@@ -1574,9 +1590,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1574 return 0; 1590 return 0;
1575} 1591}
1576 1592
1577static int nfs4_recover_expired_lease(struct nfs_server *server) 1593static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1578{ 1594{
1579 struct nfs_client *clp = server->nfs_client;
1580 unsigned int loop; 1595 unsigned int loop;
1581 int ret; 1596 int ret;
1582 1597
@@ -1587,12 +1602,17 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1602 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1603 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1604 break;
1590 nfs4_schedule_state_recovery(clp); 1605 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1606 ret = -EIO;
1592 } 1607 }
1593 return ret; 1608 return ret;
1594} 1609}
1595 1610
1611static int nfs4_recover_expired_lease(struct nfs_server *server)
1612{
1613 return nfs4_client_recover_expired_lease(server->nfs_client);
1614}
1615
1596/* 1616/*
1597 * OPEN_EXPIRED: 1617 * OPEN_EXPIRED:
1598 * reclaim state on the server after a network partition. 1618 * reclaim state on the server after a network partition.
@@ -3070,15 +3090,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3070 return err; 3090 return err;
3071} 3091}
3072 3092
3073static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3093static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3074{ 3094{
3075 struct nfs_server *server = NFS_SERVER(data->inode); 3095 struct nfs_server *server = NFS_SERVER(data->inode);
3076 3096
3077 dprintk("--> %s\n", __func__);
3078
3079 if (!nfs4_sequence_done(task, &data->res.seq_res))
3080 return -EAGAIN;
3081
3082 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3097 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3083 nfs_restart_rpc(task, server->nfs_client); 3098 nfs_restart_rpc(task, server->nfs_client);
3084 return -EAGAIN; 3099 return -EAGAIN;
@@ -3090,19 +3105,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3090 return 0; 3105 return 0;
3091} 3106}
3092 3107
3108static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3109{
3110
3111 dprintk("--> %s\n", __func__);
3112
3113 if (!nfs4_sequence_done(task, &data->res.seq_res))
3114 return -EAGAIN;
3115
3116 return data->read_done_cb(task, data);
3117}
3118
3093static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 3119static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
3094{ 3120{
3095 data->timestamp = jiffies; 3121 data->timestamp = jiffies;
3122 data->read_done_cb = nfs4_read_done_cb;
3096 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3123 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3097} 3124}
3098 3125
3099static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3126/* Reset the the nfs_read_data to send the read to the MDS. */
3127void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3128{
3129 dprintk("%s Reset task for i/o through\n", __func__);
3130 put_lseg(data->lseg);
3131 data->lseg = NULL;
3132 /* offsets will differ in the dense stripe case */
3133 data->args.offset = data->mds_offset;
3134 data->ds_clp = NULL;
3135 data->args.fh = NFS_FH(data->inode);
3136 data->read_done_cb = nfs4_read_done_cb;
3137 task->tk_ops = data->mds_ops;
3138 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3139}
3140EXPORT_SYMBOL_GPL(nfs4_reset_read);
3141
3142static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3100{ 3143{
3101 struct inode *inode = data->inode; 3144 struct inode *inode = data->inode;
3102 3145
3103 if (!nfs4_sequence_done(task, &data->res.seq_res))
3104 return -EAGAIN;
3105
3106 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3146 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3107 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3147 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3108 return -EAGAIN; 3148 return -EAGAIN;
@@ -3114,11 +3154,41 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3114 return 0; 3154 return 0;
3115} 3155}
3116 3156
3157static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3158{
3159 if (!nfs4_sequence_done(task, &data->res.seq_res))
3160 return -EAGAIN;
3161 return data->write_done_cb(task, data);
3162}
3163
3164/* Reset the the nfs_write_data to send the write to the MDS. */
3165void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
3166{
3167 dprintk("%s Reset task for i/o through\n", __func__);
3168 put_lseg(data->lseg);
3169 data->lseg = NULL;
3170 data->ds_clp = NULL;
3171 data->write_done_cb = nfs4_write_done_cb;
3172 data->args.fh = NFS_FH(data->inode);
3173 data->args.bitmask = data->res.server->cache_consistency_bitmask;
3174 data->args.offset = data->mds_offset;
3175 data->res.fattr = &data->fattr;
3176 task->tk_ops = data->mds_ops;
3177 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3178}
3179EXPORT_SYMBOL_GPL(nfs4_reset_write);
3180
3117static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3181static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3118{ 3182{
3119 struct nfs_server *server = NFS_SERVER(data->inode); 3183 struct nfs_server *server = NFS_SERVER(data->inode);
3120 3184
3121 data->args.bitmask = server->cache_consistency_bitmask; 3185 if (data->lseg) {
3186 data->args.bitmask = NULL;
3187 data->res.fattr = NULL;
3188 } else
3189 data->args.bitmask = server->cache_consistency_bitmask;
3190 if (!data->write_done_cb)
3191 data->write_done_cb = nfs4_write_done_cb;
3122 data->res.server = server; 3192 data->res.server = server;
3123 data->timestamp = jiffies; 3193 data->timestamp = jiffies;
3124 3194
@@ -3178,7 +3248,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3248 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3249 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3250 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3251 nfs4_schedule_lease_recovery(clp);
3182 return; 3252 return;
3183 } 3253 }
3184 do_renew_lease(clp, timestamp); 3254 do_renew_lease(clp, timestamp);
@@ -3252,6 +3322,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3322 }
3253} 3323}
3254 3324
3325static int buf_to_pages_noslab(const void *buf, size_t buflen,
3326 struct page **pages, unsigned int *pgbase)
3327{
3328 struct page *newpage, **spages;
3329 int rc = 0;
3330 size_t len;
3331 spages = pages;
3332
3333 do {
3334 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3335 newpage = alloc_page(GFP_KERNEL);
3336
3337 if (newpage == NULL)
3338 goto unwind;
3339 memcpy(page_address(newpage), buf, len);
3340 buf += len;
3341 buflen -= len;
3342 *pages++ = newpage;
3343 rc++;
3344 } while (buflen != 0);
3345
3346 return rc;
3347
3348unwind:
3349 for(; rc > 0; rc--)
3350 __free_page(spages[rc-1]);
3351 return -ENOMEM;
3352}
3353
3255struct nfs4_cached_acl { 3354struct nfs4_cached_acl {
3256 int cached; 3355 int cached;
3257 size_t len; 3356 size_t len;
@@ -3420,13 +3519,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3519 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3520 .rpc_resp = &res,
3422 }; 3521 };
3423 int ret; 3522 int ret, i;
3424 3523
3425 if (!nfs4_server_supports_acls(server)) 3524 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3525 return -EOPNOTSUPP;
3526 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3527 if (i < 0)
3528 return i;
3427 nfs_inode_return_delegation(inode); 3529 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3530 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3531
3532 /*
3533 * Free each page after tx, so the only ref left is
3534 * held by the network stack
3535 */
3536 for (; i > 0; i--)
3537 put_page(pages[i-1]);
3538
3430 /* 3539 /*
3431 * Acl update can result in inode attribute update. 3540 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3541 * so mark the attribute cache invalid.
@@ -3464,12 +3573,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3573 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3574 if (state == NULL)
3466 break; 3575 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3576 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3577 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3578 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3579 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3580 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3581 nfs4_schedule_lease_recovery(clp);
3582 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3583#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3584 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3585 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3590,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3590 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3591 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3592 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3593 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3594 task->tk_status = 0;
3485 return -EAGAIN; 3595 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3596#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3607,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3607 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3608 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3609 return 0;
3500do_state_recovery: 3610wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3611 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3612 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3613 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3614 task->tk_status = 0;
@@ -4110,7 +4219,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4219 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4220 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4221 if (!IS_ERR(task))
4113 rpc_put_task(task); 4222 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4223 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4224 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4225 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4243,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4243
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4244static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4245{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4246 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4247 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4248 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4249 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4250 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4251 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4252 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4253 break;
4149 case -NFS4ERR_STALE_STATEID: 4254 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4255 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4256 case -NFS4ERR_EXPIRED:
4257 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4258 };
4155} 4259}
4156 4260
@@ -4366,12 +4470,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4470 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4471 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4472 case -NFS4ERR_STALE_STATEID:
4473 nfs4_schedule_lease_recovery(server->nfs_client);
4474 goto out;
4369 case -NFS4ERR_BADSESSION: 4475 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4476 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4477 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4478 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4479 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4480 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4481 goto out;
4376 case -ERESTARTSYS: 4482 case -ERESTARTSYS:
4377 /* 4483 /*
@@ -4381,7 +4487,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4487 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4488 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4489 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4490 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4491 err = 0;
4386 goto out; 4492 goto out;
4387 case -EKEYEXPIRED: 4493 case -EKEYEXPIRED:
@@ -4988,10 +5094,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
4988 int status; 5094 int status;
4989 unsigned *ptr; 5095 unsigned *ptr;
4990 struct nfs4_session *session = clp->cl_session; 5096 struct nfs4_session *session = clp->cl_session;
5097 long timeout = 0;
5098 int err;
4991 5099
4992 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5100 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4993 5101
4994 status = _nfs4_proc_create_session(clp); 5102 do {
5103 status = _nfs4_proc_create_session(clp);
5104 if (status == -NFS4ERR_DELAY) {
5105 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5106 if (err)
5107 status = err;
5108 }
5109 } while (status == -NFS4ERR_DELAY);
5110
4995 if (status) 5111 if (status)
4996 goto out; 5112 goto out;
4997 5113
@@ -5073,6 +5189,27 @@ int nfs4_init_session(struct nfs_server *server)
5073 return ret; 5189 return ret;
5074} 5190}
5075 5191
5192int nfs4_init_ds_session(struct nfs_client *clp)
5193{
5194 struct nfs4_session *session = clp->cl_session;
5195 int ret;
5196
5197 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5198 return 0;
5199
5200 ret = nfs4_client_recover_expired_lease(clp);
5201 if (!ret)
5202 /* Test for the DS role */
5203 if (!is_ds_client(clp))
5204 ret = -ENODEV;
5205 if (!ret)
5206 ret = nfs4_check_client_ready(clp);
5207 return ret;
5208
5209}
5210EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5211
5212
5076/* 5213/*
5077 * Renew the cl_session lease. 5214 * Renew the cl_session lease.
5078 */ 5215 */
@@ -5100,7 +5237,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5237 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5238 return -EAGAIN;
5102 default: 5239 default:
5103 nfs4_schedule_state_recovery(clp); 5240 nfs4_schedule_lease_recovery(clp);
5104 } 5241 }
5105 return 0; 5242 return 0;
5106} 5243}
@@ -5187,7 +5324,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5324 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5325 ret = PTR_ERR(task);
5189 else 5326 else
5190 rpc_put_task(task); 5327 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5328 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5329 return ret;
5193} 5330}
@@ -5203,8 +5340,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5340 goto out;
5204 } 5341 }
5205 ret = rpc_wait_for_completion_task(task); 5342 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5343 if (!ret) {
5344 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5345
5346 if (task->tk_status == 0)
5347 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5348 ret = task->tk_status;
5349 }
5208 rpc_put_task(task); 5350 rpc_put_task(task);
5209out: 5351out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5352 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5383,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5383 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5384 return -EAGAIN;
5243 default: 5385 default:
5244 nfs4_schedule_state_recovery(clp); 5386 nfs4_schedule_lease_recovery(clp);
5245 } 5387 }
5246 return 0; 5388 return 0;
5247} 5389}
@@ -5309,6 +5451,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5451 status = PTR_ERR(task);
5310 goto out; 5452 goto out;
5311 } 5453 }
5454 status = nfs4_wait_for_completion_rpc_task(task);
5455 if (status == 0)
5456 status = task->tk_status;
5312 rpc_put_task(task); 5457 rpc_put_task(task);
5313 return 0; 5458 return 0;
5314out: 5459out:
@@ -5595,6 +5740,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5595 .clear_acl_cache = nfs4_zap_acl_attr, 5740 .clear_acl_cache = nfs4_zap_acl_attr,
5596 .close_context = nfs4_close_context, 5741 .close_context = nfs4_close_context,
5597 .open_context = nfs4_atomic_open, 5742 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client,
5598}; 5744};
5599 5745
5600static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 402143d75fc5..df8e7f3ca56d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 66
67 rcu_read_lock(); 67 if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
70 goto out; 68 goto out;
71 }
72 rcu_read_unlock();
73 69
74 spin_lock(&clp->cl_lock); 70 spin_lock(&clp->cl_lock);
75 lease = clp->cl_lease_time; 71 lease = clp->cl_lease_time;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..ab1bf5bb021f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
153 int status; 153 int status;
154 struct nfs_fsinfo fsinfo; 154 struct nfs_fsinfo fsinfo;
155 155
156 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
157 nfs4_schedule_state_renewal(clp);
158 return 0;
159 }
160
156 status = nfs4_proc_get_lease_time(clp, &fsinfo); 161 status = nfs4_proc_get_lease_time(clp, &fsinfo);
157 if (status == 0) { 162 if (status == 0) {
158 /* Update lease time and schedule renewal */ 163 /* Update lease time and schedule renewal */
@@ -1007,9 +1012,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1012}
1008 1013
1009/* 1014/*
1010 * Schedule a state recovery attempt 1015 * Schedule a lease recovery attempt
1011 */ 1016 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1017void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1018{
1014 if (!clp) 1019 if (!clp)
1015 return; 1020 return;
@@ -1018,7 +1023,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1023 nfs4_schedule_state_manager(clp);
1019} 1024}
1020 1025
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1026static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1027{
1023 1028
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1029 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1037,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1037 return 1;
1033} 1038}
1034 1039
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1040static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1041{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1042 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1043 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1046,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1046 return 1;
1042} 1047}
1043 1048
1049void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1050{
1051 struct nfs_client *clp = server->nfs_client;
1052
1053 nfs4_state_mark_reclaim_nograce(clp, state);
1054 nfs4_schedule_state_manager(clp);
1055}
1056
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1057static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1058{
1046 struct inode *inode = state->inode; 1059 struct inode *inode = state->inode;
@@ -1436,10 +1449,16 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1449}
1437 1450
1438#ifdef CONFIG_NFS_V4_1 1451#ifdef CONFIG_NFS_V4_1
1452void nfs4_schedule_session_recovery(struct nfs4_session *session)
1453{
1454 nfs4_schedule_lease_recovery(session->clp);
1455}
1456EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1457
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1458void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1459{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1460 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_manager(clp);
1443} 1462}
1444 1463
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1464static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1466,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1466 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1467 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1468 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1469 nfs4_schedule_state_manager(clp);
1451 } 1470 }
1452} 1471}
1453 1472
@@ -1455,7 +1474,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1474{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1475 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1476 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1477 nfs4_schedule_state_manager(clp);
1459 } 1478 }
1460} 1479}
1461 1480
@@ -1475,7 +1494,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1494{
1476 nfs_expire_all_delegations(clp); 1495 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1496 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1497 nfs4_schedule_state_manager(clp);
1479} 1498}
1480 1499
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1500void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..0cf560f77884 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
844 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
845 len += 4; 845 len += 4;
846 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); 847 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
848 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
849 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
850 iap->ia_uid); 850 iap->ia_uid);
@@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
857 } 857 }
858 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); 859 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
860 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
861 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
862 iap->ia_gid); 862 iap->ia_gid);
@@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1384 hdr->replen += decode_putrootfh_maxsz; 1384 hdr->replen += decode_putrootfh_maxsz;
1385} 1385}
1386 1386
1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) 1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
1388{ 1388{
1389 nfs4_stateid stateid; 1389 nfs4_stateid stateid;
1390 __be32 *p; 1390 __be32 *p;
@@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1392 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1392 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1393 if (ctx->state != NULL) { 1393 if (ctx->state != NULL) {
1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1395 if (zero_seqid)
1396 stateid.stateid.seqid = 0;
1395 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1397 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1396 } else 1398 } else
1397 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1399 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1404 p = reserve_space(xdr, 4); 1406 p = reserve_space(xdr, 4);
1405 *p = cpu_to_be32(OP_READ); 1407 *p = cpu_to_be32(OP_READ);
1406 1408
1407 encode_stateid(xdr, args->context, args->lock_context); 1409 encode_stateid(xdr, args->context, args->lock_context,
1410 hdr->minorversion);
1408 1411
1409 p = reserve_space(xdr, 12); 1412 p = reserve_space(xdr, 12);
1410 p = xdr_encode_hyper(p, args->offset); 1413 p = xdr_encode_hyper(p, args->offset);
@@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1592 p = reserve_space(xdr, 4); 1595 p = reserve_space(xdr, 4);
1593 *p = cpu_to_be32(OP_WRITE); 1596 *p = cpu_to_be32(OP_WRITE);
1594 1597
1595 encode_stateid(xdr, args->context, args->lock_context); 1598 encode_stateid(xdr, args->context, args->lock_context,
1599 hdr->minorversion);
1596 1600
1597 p = reserve_space(xdr, 16); 1601 p = reserve_space(xdr, 16);
1598 p = xdr_encode_hyper(p, args->offset); 1602 p = xdr_encode_hyper(p, args->offset);
@@ -1660,7 +1664,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1664
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1665 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1666 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1667 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1668 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1669 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1670
@@ -2271,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2271 encode_putfh(xdr, args->fh, &hdr); 2275 encode_putfh(xdr, args->fh, &hdr);
2272 encode_write(xdr, args, &hdr); 2276 encode_write(xdr, args, &hdr);
2273 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2277 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2274 encode_getfattr(xdr, args->bitmask, &hdr); 2278 if (args->bitmask)
2279 encode_getfattr(xdr, args->bitmask, &hdr);
2275 encode_nops(&hdr); 2280 encode_nops(&hdr);
2276} 2281}
2277 2282
@@ -3382,7 +3387,7 @@ out_overflow:
3382} 3387}
3383 3388
3384static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3389static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3385 struct nfs_client *clp, uint32_t *uid, int may_sleep) 3390 const struct nfs_server *server, uint32_t *uid, int may_sleep)
3386{ 3391{
3387 uint32_t len; 3392 uint32_t len;
3388 __be32 *p; 3393 __be32 *p;
@@ -3402,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3402 if (!may_sleep) { 3407 if (!may_sleep) {
3403 /* do nothing */ 3408 /* do nothing */
3404 } else if (len < XDR_MAX_NETOBJ) { 3409 } else if (len < XDR_MAX_NETOBJ) {
3405 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3410 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
3406 ret = NFS_ATTR_FATTR_OWNER; 3411 ret = NFS_ATTR_FATTR_OWNER;
3407 else 3412 else
3408 dprintk("%s: nfs_map_name_to_uid failed!\n", 3413 dprintk("%s: nfs_map_name_to_uid failed!\n",
@@ -3420,7 +3425,7 @@ out_overflow:
3420} 3425}
3421 3426
3422static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3427static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3423 struct nfs_client *clp, uint32_t *gid, int may_sleep) 3428 const struct nfs_server *server, uint32_t *gid, int may_sleep)
3424{ 3429{
3425 uint32_t len; 3430 uint32_t len;
3426 __be32 *p; 3431 __be32 *p;
@@ -3440,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3440 if (!may_sleep) { 3445 if (!may_sleep) {
3441 /* do nothing */ 3446 /* do nothing */
3442 } else if (len < XDR_MAX_NETOBJ) { 3447 } else if (len < XDR_MAX_NETOBJ) {
3443 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3448 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
3444 ret = NFS_ATTR_FATTR_GROUP; 3449 ret = NFS_ATTR_FATTR_GROUP;
3445 else 3450 else
3446 dprintk("%s: nfs_map_group_to_gid failed!\n", 3451 dprintk("%s: nfs_map_group_to_gid failed!\n",
@@ -3939,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3939 goto xdr_error; 3944 goto xdr_error;
3940 fattr->valid |= status; 3945 fattr->valid |= status;
3941 3946
3942 status = decode_attr_owner(xdr, bitmap, server->nfs_client, 3947 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
3943 &fattr->uid, may_sleep);
3944 if (status < 0) 3948 if (status < 0)
3945 goto xdr_error; 3949 goto xdr_error;
3946 fattr->valid |= status; 3950 fattr->valid |= status;
3947 3951
3948 status = decode_attr_group(xdr, bitmap, server->nfs_client, 3952 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
3949 &fattr->gid, may_sleep);
3950 if (status < 0) 3953 if (status < 0)
3951 goto xdr_error; 3954 goto xdr_error;
3952 fattr->valid |= status; 3955 fattr->valid |= status;
@@ -4694,7 +4697,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4697 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4698 if (unlikely(!p))
4696 goto out_overflow; 4699 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4700 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4701 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4702 if (unlikely(!p))
4700 goto out_overflow; 4703 goto out_overflow;
@@ -5690,8 +5693,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5690 status = decode_write(xdr, res); 5693 status = decode_write(xdr, res);
5691 if (status) 5694 if (status)
5692 goto out; 5695 goto out;
5693 decode_getfattr(xdr, res->fattr, res->server, 5696 if (res->fattr)
5694 !RPC_IS_ASYNC(rqstp->rq_task)); 5697 decode_getfattr(xdr, res->fattr, res->server,
5698 !RPC_IS_ASYNC(rqstp->rq_task));
5695 if (!status) 5699 if (!status)
5696 status = res->count; 5700 status = res->count;
5697out: 5701out:
@@ -6167,8 +6171,6 @@ static struct {
6167 { NFS4ERR_DQUOT, -EDQUOT }, 6171 { NFS4ERR_DQUOT, -EDQUOT },
6168 { NFS4ERR_STALE, -ESTALE }, 6172 { NFS4ERR_STALE, -ESTALE },
6169 { NFS4ERR_BADHANDLE, -EBADHANDLE }, 6173 { NFS4ERR_BADHANDLE, -EBADHANDLE },
6170 { NFS4ERR_BADOWNER, -EINVAL },
6171 { NFS4ERR_BADNAME, -EINVAL },
6172 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 6174 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
6173 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 6175 { NFS4ERR_NOTSUPP, -ENOTSUPP },
6174 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 6176 { NFS4ERR_TOOSMALL, -ETOOSMALL },
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e1164e3f9e69..23e794410669 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,6 +20,7 @@
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "internal.h" 22#include "internal.h"
23#include "pnfs.h"
23 24
24static struct kmem_cache *nfs_page_cachep; 25static struct kmem_cache *nfs_page_cachep;
25 26
@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
213 */ 214 */
214void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 215void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 struct inode *inode, 216 struct inode *inode,
216 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), 217 int (*doio)(struct nfs_pageio_descriptor *),
217 size_t bsize, 218 size_t bsize,
218 int io_flags) 219 int io_flags)
219{ 220{
@@ -226,6 +227,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
226 desc->pg_doio = doio; 227 desc->pg_doio = doio;
227 desc->pg_ioflags = io_flags; 228 desc->pg_ioflags = io_flags;
228 desc->pg_error = 0; 229 desc->pg_error = 0;
230 desc->pg_lseg = NULL;
229} 231}
230 232
231/** 233/**
@@ -240,7 +242,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 * Return 'true' if this is the case, else return 'false'. 242 * Return 'true' if this is the case, else return 'false'.
241 */ 243 */
242static int nfs_can_coalesce_requests(struct nfs_page *prev, 244static int nfs_can_coalesce_requests(struct nfs_page *prev,
243 struct nfs_page *req) 245 struct nfs_page *req,
246 struct nfs_pageio_descriptor *pgio)
244{ 247{
245 if (req->wb_context->cred != prev->wb_context->cred) 248 if (req->wb_context->cred != prev->wb_context->cred)
246 return 0; 249 return 0;
@@ -254,6 +257,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
254 return 0; 257 return 0;
255 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 258 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
256 return 0; 259 return 0;
260 /*
261 * Non-whole file layouts need to check that req is inside of
262 * pgio->pg_lseg.
263 */
264 if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
265 return 0;
257 return 1; 266 return 1;
258} 267}
259 268
@@ -286,7 +295,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
286 if (newlen > desc->pg_bsize) 295 if (newlen > desc->pg_bsize)
287 return 0; 296 return 0;
288 prev = nfs_list_entry(desc->pg_list.prev); 297 prev = nfs_list_entry(desc->pg_list.prev);
289 if (!nfs_can_coalesce_requests(prev, req)) 298 if (!nfs_can_coalesce_requests(prev, req, desc))
290 return 0; 299 return 0;
291 } else 300 } else
292 desc->pg_base = req->wb_pgbase; 301 desc->pg_base = req->wb_pgbase;
@@ -302,12 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
302static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 311static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
303{ 312{
304 if (!list_empty(&desc->pg_list)) { 313 if (!list_empty(&desc->pg_list)) {
305 int error = desc->pg_doio(desc->pg_inode, 314 int error = desc->pg_doio(desc);
306 &desc->pg_list,
307 nfs_page_array_len(desc->pg_base,
308 desc->pg_count),
309 desc->pg_count,
310 desc->pg_ioflags);
311 if (error < 0) 315 if (error < 0)
312 desc->pg_error = error; 316 desc->pg_error = error;
313 else 317 else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a0fb0a..f38813a0a295 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include "internal.h" 31#include "internal.h"
32#include "pnfs.h" 32#include "pnfs.h"
33#include "iostat.h"
33 34
34#define NFSDBG_FACILITY NFSDBG_PNFS 35#define NFSDBG_FACILITY NFSDBG_PNFS
35 36
@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
74void 75void
75unset_pnfs_layoutdriver(struct nfs_server *nfss) 76unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{ 77{
77 if (nfss->pnfs_curr_ld) { 78 if (nfss->pnfs_curr_ld)
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner); 79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL; 80 nfss->pnfs_curr_ld = NULL;
82} 81}
83 82
@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
115 goto out_no_driver; 114 goto out_no_driver;
116 } 115 }
117 server->pnfs_curr_ld = ld_type; 116 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) { 117
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id); 118 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return; 119 return;
127 120
@@ -230,37 +223,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
230 put_layout_hdr(NFS_I(ino)->layout); 223 put_layout_hdr(NFS_I(ino)->layout);
231} 224}
232 225
233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg 226static void
234 * could sleep, so must be called outside of the lock. 227put_lseg_common(struct pnfs_layout_segment *lseg)
235 * Returns 1 if object was removed, otherwise return 0. 228{
236 */ 229 struct inode *inode = lseg->pls_layout->plh_inode;
237static int 230
238put_lseg_locked(struct pnfs_layout_segment *lseg, 231 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
239 struct list_head *tmp_list) 232 list_del_init(&lseg->pls_list);
233 if (list_empty(&lseg->pls_layout->plh_segs)) {
234 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
235 /* Matched by initial refcount set in alloc_init_layout_hdr */
236 put_layout_hdr_locked(lseg->pls_layout);
237 }
238 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
239}
240
241void
242put_lseg(struct pnfs_layout_segment *lseg)
240{ 243{
244 struct inode *inode;
245
246 if (!lseg)
247 return;
248
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 249 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount), 250 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 251 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) { 252 inode = lseg->pls_layout->plh_inode;
245 struct inode *ino = lseg->pls_layout->plh_inode; 253 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
254 LIST_HEAD(free_me);
246 255
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 256 put_lseg_common(lseg);
248 list_del(&lseg->pls_list); 257 list_add(&lseg->pls_list, &free_me);
249 if (list_empty(&lseg->pls_layout->plh_segs)) { 258 spin_unlock(&inode->i_lock);
250 struct nfs_client *clp; 259 pnfs_free_lseg_list(&free_me);
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 } 260 }
263 return 0;
264} 261}
265 262
266static bool 263static bool
@@ -281,7 +278,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
281 * list. It will now be removed when all 278 * list. It will now be removed when all
282 * outstanding io is finished. 279 * outstanding io is finished.
283 */ 280 */
284 rv = put_lseg_locked(lseg, tmp_list); 281 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
282 atomic_read(&lseg->pls_refcount));
283 if (atomic_dec_and_test(&lseg->pls_refcount)) {
284 put_lseg_common(lseg);
285 list_add(&lseg->pls_list, tmp_list);
286 rv = 1;
287 }
285 } 288 }
286 return rv; 289 return rv;
287} 290}
@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
299 302
300 dprintk("%s:Begin lo %p\n", __func__, lo); 303 dprintk("%s:Begin lo %p\n", __func__, lo);
301 304
305 if (list_empty(&lo->plh_segs)) {
306 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
307 put_layout_hdr_locked(lo);
308 return 0;
309 }
302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 310 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 311 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
304 dprintk("%s: freeing lseg %p iomode %d " 312 dprintk("%s: freeing lseg %p iomode %d "
@@ -312,11 +320,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
312 return invalid - removed; 320 return invalid - removed;
313} 321}
314 322
323/* note free_me must contain lsegs from a single layout_hdr */
315void 324void
316pnfs_free_lseg_list(struct list_head *free_me) 325pnfs_free_lseg_list(struct list_head *free_me)
317{ 326{
318 struct pnfs_layout_segment *lseg, *tmp; 327 struct pnfs_layout_segment *lseg, *tmp;
328 struct pnfs_layout_hdr *lo;
329
330 if (list_empty(free_me))
331 return;
319 332
333 lo = list_first_entry(free_me, struct pnfs_layout_segment,
334 pls_list)->pls_layout;
335
336 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
337 struct nfs_client *clp;
338
339 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
340 spin_lock(&clp->cl_lock);
341 list_del_init(&lo->plh_layouts);
342 spin_unlock(&clp->cl_lock);
343 }
320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 344 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
321 list_del(&lseg->pls_list); 345 list_del(&lseg->pls_list);
322 free_lseg(lseg); 346 free_lseg(lseg);
@@ -332,10 +356,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
332 spin_lock(&nfsi->vfs_inode.i_lock); 356 spin_lock(&nfsi->vfs_inode.i_lock);
333 lo = nfsi->layout; 357 lo = nfsi->layout;
334 if (lo) { 358 if (lo) {
335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); 359 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 360 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo);
339 } 361 }
340 spin_unlock(&nfsi->vfs_inode.i_lock); 362 spin_unlock(&nfsi->vfs_inode.i_lock);
341 pnfs_free_lseg_list(&tmp_list); 363 pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +425,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 425 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
404 return true; 426 return true;
405 return lo->plh_block_lgets || 427 return lo->plh_block_lgets ||
428 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 429 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
407 (list_empty(&lo->plh_segs) && 430 (list_empty(&lo->plh_segs) &&
408 (atomic_read(&lo->plh_outstanding) > lget)); 431 (atomic_read(&lo->plh_outstanding) > lget));
@@ -674,7 +697,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 697 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 698 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) { 699 is_matching_lseg(lseg, iomode)) {
677 ret = lseg; 700 ret = get_lseg(lseg);
678 break; 701 break;
679 } 702 }
680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 703 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
@@ -699,6 +722,7 @@ pnfs_update_layout(struct inode *ino,
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 722 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
700 struct pnfs_layout_hdr *lo; 723 struct pnfs_layout_hdr *lo;
701 struct pnfs_layout_segment *lseg = NULL; 724 struct pnfs_layout_segment *lseg = NULL;
725 bool first = false;
702 726
703 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 727 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
704 return NULL; 728 return NULL;
@@ -715,21 +739,25 @@ pnfs_update_layout(struct inode *ino,
715 dprintk("%s matches recall, use MDS\n", __func__); 739 dprintk("%s matches recall, use MDS\n", __func__);
716 goto out_unlock; 740 goto out_unlock;
717 } 741 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
722 742
723 /* if LAYOUTGET already failed once we don't try again */ 743 /* if LAYOUTGET already failed once we don't try again */
724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 744 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock; 745 goto out_unlock;
726 746
747 /* Check to see if the layout for the given range already exists */
748 lseg = pnfs_find_lseg(lo, iomode);
749 if (lseg)
750 goto out_unlock;
751
727 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 752 if (pnfs_layoutgets_blocked(lo, NULL, 0))
728 goto out_unlock; 753 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding); 754 atomic_inc(&lo->plh_outstanding);
730 755
731 get_layout_hdr(lo); 756 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) { 757 if (list_empty(&lo->plh_segs))
758 first = true;
759 spin_unlock(&ino->i_lock);
760 if (first) {
733 /* The lo must be on the clp list if there is any 761 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in. 762 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */ 763 */
@@ -738,24 +766,18 @@ pnfs_update_layout(struct inode *ino,
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 766 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock); 767 spin_unlock(&clp->cl_lock);
740 } 768 }
741 spin_unlock(&ino->i_lock);
742 769
743 lseg = send_layoutget(lo, ctx, iomode); 770 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) { 771 if (!lseg && first) {
745 spin_lock(&ino->i_lock); 772 spin_lock(&clp->cl_lock);
746 if (list_empty(&lo->plh_segs)) { 773 list_del_init(&lo->plh_layouts);
747 spin_lock(&clp->cl_lock); 774 spin_unlock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 } 775 }
754 atomic_dec(&lo->plh_outstanding); 776 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo); 777 put_layout_hdr(lo);
756out: 778out:
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 779 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
758 nfsi->layout->plh_flags, lseg); 780 nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
759 return lseg; 781 return lseg;
760out_unlock: 782out_unlock:
761 spin_unlock(&ino->i_lock); 783 spin_unlock(&ino->i_lock);
@@ -808,7 +830,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
808 } 830 }
809 init_lseg(lo, lseg); 831 init_lseg(lo, lseg);
810 lseg->pls_range = res->range; 832 lseg->pls_range = res->range;
811 *lgp->lsegpp = lseg; 833 *lgp->lsegpp = get_lseg(lseg);
812 pnfs_insert_layout(lo, lseg); 834 pnfs_insert_layout(lo, lseg);
813 835
814 if (res->return_on_close) { 836 if (res->return_on_close) {
@@ -829,137 +851,97 @@ out_forget_reply:
829 goto out; 851 goto out;
830} 852}
831 853
832/* 854static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
833 * Device ID cache. Currently supports one layout type per struct nfs_client. 855 struct nfs_page *prev,
834 * Add layout type to the lookup key to expand to support multiple types. 856 struct nfs_page *req)
835 */
836int
837pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
838 void (*free_callback)(struct pnfs_deviceid_node *))
839{ 857{
840 struct pnfs_deviceid_cache *c; 858 if (pgio->pg_count == prev->wb_bytes) {
841 859 /* This is first coelesce call for a series of nfs_pages */
842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); 860 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
843 if (!c) 861 prev->wb_context,
844 return -ENOMEM; 862 IOMODE_READ);
845 spin_lock(&clp->cl_lock);
846 if (clp->cl_devid_cache != NULL) {
847 atomic_inc(&clp->cl_devid_cache->dc_ref);
848 dprintk("%s [kref [%d]]\n", __func__,
849 atomic_read(&clp->cl_devid_cache->dc_ref));
850 kfree(c);
851 } else {
852 /* kzalloc initializes hlists */
853 spin_lock_init(&c->dc_lock);
854 atomic_set(&c->dc_ref, 1);
855 c->dc_free_callback = free_callback;
856 clp->cl_devid_cache = c;
857 dprintk("%s [new]\n", __func__);
858 } 863 }
859 spin_unlock(&clp->cl_lock); 864 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
860 return 0;
861} 865}
862EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
863 866
864/*
865 * Called from pnfs_layoutdriver_type->free_lseg
866 * last layout segment reference frees deviceid
867 */
868void 867void
869pnfs_put_deviceid(struct pnfs_deviceid_cache *c, 868pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
870 struct pnfs_deviceid_node *devid)
871{ 869{
872 struct nfs4_deviceid *id = &devid->de_id; 870 struct pnfs_layoutdriver_type *ld;
873 struct pnfs_deviceid_node *d;
874 struct hlist_node *n;
875 long h = nfs4_deviceid_hash(id);
876 871
877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); 872 ld = NFS_SERVER(inode)->pnfs_curr_ld;
878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) 873 pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
879 return; 874}
880 875
881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) 876static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
882 if (!memcmp(&d->de_id, id, sizeof(*id))) { 877 struct nfs_page *prev,
883 hlist_del_rcu(&d->de_node); 878 struct nfs_page *req)
884 spin_unlock(&c->dc_lock); 879{
885 synchronize_rcu(); 880 if (pgio->pg_count == prev->wb_bytes) {
886 c->dc_free_callback(devid); 881 /* This is first coelesce call for a series of nfs_pages */
887 return; 882 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
888 } 883 prev->wb_context,
889 spin_unlock(&c->dc_lock); 884 IOMODE_RW);
890 /* Why wasn't it found in the list? */
891 BUG();
892}
893EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
894
895/* Find and reference a deviceid */
896struct pnfs_deviceid_node *
897pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
898{
899 struct pnfs_deviceid_node *d;
900 struct hlist_node *n;
901 long hash = nfs4_deviceid_hash(id);
902
903 dprintk("--> %s hash %ld\n", __func__, hash);
904 rcu_read_lock();
905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
906 if (!memcmp(&d->de_id, id, sizeof(*id))) {
907 if (!atomic_inc_not_zero(&d->de_ref)) {
908 goto fail;
909 } else {
910 rcu_read_unlock();
911 return d;
912 }
913 }
914 } 885 }
915fail: 886 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
916 rcu_read_unlock(); 887}
917 return NULL; 888
889void
890pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
891{
892 struct pnfs_layoutdriver_type *ld;
893
894 ld = NFS_SERVER(inode)->pnfs_curr_ld;
895 pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
896}
897
898enum pnfs_try_status
899pnfs_try_to_write_data(struct nfs_write_data *wdata,
900 const struct rpc_call_ops *call_ops, int how)
901{
902 struct inode *inode = wdata->inode;
903 enum pnfs_try_status trypnfs;
904 struct nfs_server *nfss = NFS_SERVER(inode);
905
906 wdata->mds_ops = call_ops;
907
908 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
909 inode->i_ino, wdata->args.count, wdata->args.offset, how);
910
911 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
912 if (trypnfs == PNFS_NOT_ATTEMPTED) {
913 put_lseg(wdata->lseg);
914 wdata->lseg = NULL;
915 } else
916 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
917
918 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
919 return trypnfs;
918} 920}
919EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
920 921
921/* 922/*
922 * Add a deviceid to the cache. 923 * Call the appropriate parallel I/O subsystem read function.
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
924 */ 924 */
925struct pnfs_deviceid_node * 925enum pnfs_try_status
926pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) 926pnfs_try_to_read_data(struct nfs_read_data *rdata,
927{ 927 const struct rpc_call_ops *call_ops)
928 struct pnfs_deviceid_node *d;
929 long hash = nfs4_deviceid_hash(&new->de_id);
930
931 dprintk("--> %s hash %ld\n", __func__, hash);
932 spin_lock(&c->dc_lock);
933 d = pnfs_find_get_deviceid(c, &new->de_id);
934 if (d) {
935 spin_unlock(&c->dc_lock);
936 dprintk("%s [discard]\n", __func__);
937 c->dc_free_callback(new);
938 return d;
939 }
940 INIT_HLIST_NODE(&new->de_node);
941 atomic_set(&new->de_ref, 1);
942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
943 spin_unlock(&c->dc_lock);
944 dprintk("%s [new]\n", __func__);
945 return new;
946}
947EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
948
949void
950pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 928{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 929 struct inode *inode = rdata->inode;
930 struct nfs_server *nfss = NFS_SERVER(inode);
931 enum pnfs_try_status trypnfs;
953 932
954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); 933 rdata->mds_ops = call_ops;
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 934
956 int i; 935 dprintk("%s: Reading ino:%lu %u@%llu\n",
957 /* Verify cache is empty */ 936 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) 937
959 BUG_ON(!hlist_empty(&local->dc_deviceids[i])); 938 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
960 clp->cl_devid_cache = NULL; 939 if (trypnfs == PNFS_NOT_ATTEMPTED) {
961 spin_unlock(&clp->cl_lock); 940 put_lseg(rdata->lseg);
962 kfree(local); 941 rdata->lseg = NULL;
942 } else {
943 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
963 } 944 }
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs;
964} 947}
965EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2612ea0cbed..6380b9405bcd 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,8 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33#include <linux/nfs_page.h>
34
33enum { 35enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 36 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */ 37 NFS_LSEG_ROC, /* roc bit received from server */
@@ -43,6 +45,11 @@ struct pnfs_layout_segment {
43 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
44}; 46};
45 47
48enum pnfs_try_status {
49 PNFS_ATTEMPTED = 0,
50 PNFS_NOT_ATTEMPTED = 1,
51};
52
46#ifdef CONFIG_NFS_V4_1 53#ifdef CONFIG_NFS_V4_1
47 54
48#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" 55#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -61,10 +68,18 @@ struct pnfs_layoutdriver_type {
61 const u32 id; 68 const u32 id;
62 const char *name; 69 const char *name;
63 struct module *owner; 70 struct module *owner;
64 int (*set_layoutdriver) (struct nfs_server *);
65 int (*clear_layoutdriver) (struct nfs_server *);
66 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 71 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
67 void (*free_lseg) (struct pnfs_layout_segment *lseg); 72 void (*free_lseg) (struct pnfs_layout_segment *lseg);
73
74 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76
77 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
80 */
81 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
82 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
68}; 83};
69 84
70struct pnfs_layout_hdr { 85struct pnfs_layout_hdr {
@@ -90,52 +105,6 @@ struct pnfs_device {
90 unsigned int pglen; 105 unsigned int pglen;
91}; 106};
92 107
93/*
94 * Device ID RCU cache. A device ID is unique per client ID and layout type.
95 */
96#define NFS4_DEVICE_ID_HASH_BITS 5
97#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
98#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
99
100static inline u32
101nfs4_deviceid_hash(struct nfs4_deviceid *id)
102{
103 unsigned char *cptr = (unsigned char *)id->data;
104 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
105 u32 x = 0;
106
107 while (nbytes--) {
108 x *= 37;
109 x += *cptr++;
110 }
111 return x & NFS4_DEVICE_ID_HASH_MASK;
112}
113
114struct pnfs_deviceid_node {
115 struct hlist_node de_node;
116 struct nfs4_deviceid de_id;
117 atomic_t de_ref;
118};
119
120struct pnfs_deviceid_cache {
121 spinlock_t dc_lock;
122 atomic_t dc_ref;
123 void (*dc_free_callback)(struct pnfs_deviceid_node *);
124 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
125};
126
127extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
128 void (*free_callback)(struct pnfs_deviceid_node *));
129extern void pnfs_put_deviceid_cache(struct nfs_client *);
130extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
131 struct pnfs_deviceid_cache *,
132 struct nfs4_deviceid *);
133extern struct pnfs_deviceid_node *pnfs_add_deviceid(
134 struct pnfs_deviceid_cache *,
135 struct pnfs_deviceid_node *);
136extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
137 struct pnfs_deviceid_node *devid);
138
139extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 108extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
140extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 109extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
141 110
@@ -146,11 +115,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
146 115
147/* pnfs.c */ 116/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo); 117void get_layout_hdr(struct pnfs_layout_hdr *lo);
118void put_lseg(struct pnfs_layout_segment *lseg);
149struct pnfs_layout_segment * 119struct pnfs_layout_segment *
150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 120pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
151 enum pnfs_iomode access_type); 121 enum pnfs_iomode access_type);
152void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 122void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
153void unset_pnfs_layoutdriver(struct nfs_server *); 123void unset_pnfs_layoutdriver(struct nfs_server *);
124enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
125 const struct rpc_call_ops *, int);
126enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
127 const struct rpc_call_ops *);
128void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
129void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
154int pnfs_layout_process(struct nfs4_layoutget *lgp); 130int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list); 131void pnfs_free_lseg_list(struct list_head *tmp_list);
156void pnfs_destroy_layout(struct nfs_inode *); 132void pnfs_destroy_layout(struct nfs_inode *);
@@ -177,6 +153,16 @@ static inline int lo_fail_bit(u32 iomode)
177 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
178} 154}
179 155
156static inline struct pnfs_layout_segment *
157get_lseg(struct pnfs_layout_segment *lseg)
158{
159 if (lseg) {
160 atomic_inc(&lseg->pls_refcount);
161 smp_mb__after_atomic_inc();
162 }
163 return lseg;
164}
165
180/* Return true if a layout driver is being used for this mountpoint */ 166/* Return true if a layout driver is being used for this mountpoint */
181static inline int pnfs_enabled_sb(struct nfs_server *nfss) 167static inline int pnfs_enabled_sb(struct nfs_server *nfss)
182{ 168{
@@ -194,12 +180,36 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
194} 180}
195 181
196static inline struct pnfs_layout_segment * 182static inline struct pnfs_layout_segment *
183get_lseg(struct pnfs_layout_segment *lseg)
184{
185 return NULL;
186}
187
188static inline void put_lseg(struct pnfs_layout_segment *lseg)
189{
190}
191
192static inline struct pnfs_layout_segment *
197pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 193pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
198 enum pnfs_iomode access_type) 194 enum pnfs_iomode access_type)
199{ 195{
200 return NULL; 196 return NULL;
201} 197}
202 198
199static inline enum pnfs_try_status
200pnfs_try_to_read_data(struct nfs_read_data *data,
201 const struct rpc_call_ops *call_ops)
202{
203 return PNFS_NOT_ATTEMPTED;
204}
205
206static inline enum pnfs_try_status
207pnfs_try_to_write_data(struct nfs_write_data *data,
208 const struct rpc_call_ops *call_ops, int how)
209{
210 return PNFS_NOT_ATTEMPTED;
211}
212
203static inline bool 213static inline bool
204pnfs_roc(struct inode *ino) 214pnfs_roc(struct inode *ino)
205{ 215{
@@ -230,6 +240,18 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
230{ 240{
231} 241}
232 242
243static inline void
244pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
245{
246 pgio->pg_test = NULL;
247}
248
249static inline void
250pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
251{
252 pgio->pg_test = NULL;
253}
254
233#endif /* CONFIG_NFS_V4_1 */ 255#endif /* CONFIG_NFS_V4_1 */
234 256
235#endif /* FS_NFS_PNFS_H */ 257#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77d5e21c4ad6..b8ec170f2a0f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
741 .lock = nfs_proc_lock, 741 .lock = nfs_proc_lock,
742 .lock_check_bounds = nfs_lock_check_bounds, 742 .lock_check_bounds = nfs_lock_check_bounds,
743 .close_context = nfs_close_context, 743 .close_context = nfs_close_context,
744 .init_client = nfs_init_client,
744}; 745};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index aedcaa7f291f..7cded2b12a05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,19 +18,20 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/module.h>
21 22
22#include <asm/system.h> 23#include <asm/system.h>
24#include "pnfs.h"
23 25
24#include "nfs4_fs.h" 26#include "nfs4_fs.h"
25#include "internal.h" 27#include "internal.h"
26#include "iostat.h" 28#include "iostat.h"
27#include "fscache.h" 29#include "fscache.h"
28#include "pnfs.h"
29 30
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 32
32static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
33static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
34static const struct rpc_call_ops nfs_read_partial_ops; 35static const struct rpc_call_ops nfs_read_partial_ops;
35static const struct rpc_call_ops nfs_read_full_ops; 36static const struct rpc_call_ops nfs_read_full_ops;
36 37
@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
69 70
70static void nfs_readdata_release(struct nfs_read_data *rdata) 71static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 72{
73 put_lseg(rdata->lseg);
72 put_nfs_open_context(rdata->args.context); 74 put_nfs_open_context(rdata->args.context);
73 nfs_readdata_free(rdata); 75 nfs_readdata_free(rdata);
74} 76}
@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
114int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 struct page *page) 117 struct page *page)
116{ 118{
117 LIST_HEAD(one_request);
118 struct nfs_page *new; 119 struct nfs_page *new;
119 unsigned int len; 120 unsigned int len;
121 struct nfs_pageio_descriptor pgio;
120 122
121 len = nfs_page_length(page); 123 len = nfs_page_length(page);
122 if (len == 0) 124 if (len == 0)
123 return nfs_return_empty_page(page); 125 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
125 new = nfs_create_request(ctx, inode, page, 0, len); 126 new = nfs_create_request(ctx, inode, page, 0, len);
126 if (IS_ERR(new)) { 127 if (IS_ERR(new)) {
127 unlock_page(page); 128 unlock_page(page);
@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 if (len < PAGE_CACHE_SIZE) 131 if (len < PAGE_CACHE_SIZE)
131 zero_user_segment(page, len, PAGE_CACHE_SIZE); 132 zero_user_segment(page, len, PAGE_CACHE_SIZE);
132 133
133 nfs_list_add_request(new, &one_request); 134 nfs_pageio_init(&pgio, inode, NULL, 0, 0);
135 nfs_list_add_request(new, &pgio.pg_list);
136 pgio.pg_count = len;
137
134 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
135 nfs_pagein_multi(inode, &one_request, 1, len, 0); 139 nfs_pagein_multi(&pgio);
136 else 140 else
137 nfs_pagein_one(inode, &one_request, 1, len, 0); 141 nfs_pagein_one(&pgio);
138 return 0; 142 return 0;
139} 143}
140 144
@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
155 nfs_release_request(req); 159 nfs_release_request(req);
156} 160}
157 161
158/* 162int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
159 * Set up the NFS read request struct 163 const struct rpc_call_ops *call_ops)
160 */
161static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
162 const struct rpc_call_ops *call_ops,
163 unsigned int count, unsigned int offset)
164{ 164{
165 struct inode *inode = req->wb_context->path.dentry->d_inode; 165 struct inode *inode = data->inode;
166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
167 struct rpc_task *task; 167 struct rpc_task *task;
168 struct rpc_message msg = { 168 struct rpc_message msg = {
169 .rpc_argp = &data->args, 169 .rpc_argp = &data->args,
170 .rpc_resp = &data->res, 170 .rpc_resp = &data->res,
171 .rpc_cred = req->wb_context->cred, 171 .rpc_cred = data->cred,
172 }; 172 };
173 struct rpc_task_setup task_setup_data = { 173 struct rpc_task_setup task_setup_data = {
174 .task = &data->task, 174 .task = &data->task,
175 .rpc_client = NFS_CLIENT(inode), 175 .rpc_client = clnt,
176 .rpc_message = &msg, 176 .rpc_message = &msg,
177 .callback_ops = call_ops, 177 .callback_ops = call_ops,
178 .callback_data = data, 178 .callback_data = data,
@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
180 .flags = RPC_TASK_ASYNC | swap_flags, 180 .flags = RPC_TASK_ASYNC | swap_flags,
181 }; 181 };
182 182
183 /* Set up the initial task struct. */
184 NFS_PROTO(inode)->read_setup(data, &msg);
185
186 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
187 "offset %llu)\n",
188 data->task.tk_pid,
189 inode->i_sb->s_id,
190 (long long)NFS_FILEID(inode),
191 data->args.count,
192 (unsigned long long)data->args.offset);
193
194 task = rpc_run_task(&task_setup_data);
195 if (IS_ERR(task))
196 return PTR_ERR(task);
197 rpc_put_task(task);
198 return 0;
199}
200EXPORT_SYMBOL_GPL(nfs_initiate_read);
201
202/*
203 * Set up the NFS read request struct
204 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops,
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode;
211
183 data->req = req; 212 data->req = req;
184 data->inode = inode; 213 data->inode = inode;
185 data->cred = msg.rpc_cred; 214 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
186 216
187 data->args.fh = NFS_FH(inode); 217 data->args.fh = NFS_FH(inode);
188 data->args.offset = req_offset(req) + offset; 218 data->args.offset = req_offset(req) + offset;
@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
197 data->res.eof = 0; 227 data->res.eof = 0;
198 nfs_fattr_init(&data->fattr); 228 nfs_fattr_init(&data->fattr);
199 229
200 /* Set up the initial task struct. */ 230 if (data->lseg &&
201 NFS_PROTO(inode)->read_setup(data, &msg); 231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
202 232 return 0;
203 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
204 data->task.tk_pid,
205 inode->i_sb->s_id,
206 (long long)NFS_FILEID(inode),
207 count,
208 (unsigned long long)data->args.offset);
209 233
210 task = rpc_run_task(&task_setup_data); 234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
211 if (IS_ERR(task))
212 return PTR_ERR(task);
213 rpc_put_task(task);
214 return 0;
215} 235}
216 236
217static void 237static void
@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
240 * won't see the new data until our attribute cache is updated. This is more 260 * won't see the new data until our attribute cache is updated. This is more
241 * or less conventional NFS client behavior. 261 * or less conventional NFS client behavior.
242 */ 262 */
243static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
244{ 264{
245 struct nfs_page *req = nfs_list_entry(head->next); 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
246 struct page *page = req->wb_page; 266 struct page *page = req->wb_page;
247 struct nfs_read_data *data; 267 struct nfs_read_data *data;
248 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
249 unsigned int offset; 269 unsigned int offset;
250 int requests = 0; 270 int requests = 0;
251 int ret = 0; 271 int ret = 0;
272 struct pnfs_layout_segment *lseg;
252 LIST_HEAD(list); 273 LIST_HEAD(list);
253 274
254 nfs_list_remove_request(req); 275 nfs_list_remove_request(req);
255 276
256 nbytes = count; 277 nbytes = desc->pg_count;
257 do { 278 do {
258 size_t len = min(nbytes,rsize); 279 size_t len = min(nbytes,rsize);
259 280
@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
266 } while(nbytes != 0); 287 } while(nbytes != 0);
267 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
268 289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
269 ClearPageError(page); 292 ClearPageError(page);
270 offset = 0; 293 offset = 0;
271 nbytes = count; 294 nbytes = desc->pg_count;
272 do { 295 do {
273 int ret2; 296 int ret2;
274 297
@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
280 if (nbytes < rsize) 303 if (nbytes < rsize)
281 rsize = nbytes; 304 rsize = nbytes;
282 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 305 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
283 rsize, offset); 306 rsize, offset, lseg);
284 if (ret == 0) 307 if (ret == 0)
285 ret = ret2; 308 ret = ret2;
286 offset += rsize; 309 offset += rsize;
287 nbytes -= rsize; 310 nbytes -= rsize;
288 } while (nbytes != 0); 311 } while (nbytes != 0);
312 put_lseg(lseg);
313 desc->pg_lseg = NULL;
289 314
290 return ret; 315 return ret;
291 316
@@ -300,16 +325,21 @@ out_bad:
300 return -ENOMEM; 325 return -ENOMEM;
301} 326}
302 327
303static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 328static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
304{ 329{
305 struct nfs_page *req; 330 struct nfs_page *req;
306 struct page **pages; 331 struct page **pages;
307 struct nfs_read_data *data; 332 struct nfs_read_data *data;
333 struct list_head *head = &desc->pg_list;
334 struct pnfs_layout_segment *lseg = desc->pg_lseg;
308 int ret = -ENOMEM; 335 int ret = -ENOMEM;
309 336
310 data = nfs_readdata_alloc(npages); 337 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
311 if (!data) 338 desc->pg_count));
312 goto out_bad; 339 if (!data) {
340 nfs_async_read_error(head);
341 goto out;
342 }
313 343
314 pages = data->pagevec; 344 pages = data->pagevec;
315 while (!list_empty(head)) { 345 while (!list_empty(head)) {
@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
320 *pages++ = req->wb_page; 350 *pages++ = req->wb_page;
321 } 351 }
322 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
323 355
324 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
325out_bad: 357 0, lseg);
326 nfs_async_read_error(head); 358out:
359 put_lseg(lseg);
360 desc->pg_lseg = NULL;
327 return ret; 361 return ret;
328} 362}
329 363
@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
366 return; 400 return;
367 401
368 /* Yes, so retry the read at the end of the data */ 402 /* Yes, so retry the read at the end of the data */
403 data->mds_offset += resp->count;
369 argp->offset += resp->count; 404 argp->offset += resp->count;
370 argp->pgbase += resp->count; 405 argp->pgbase += resp->count;
371 argp->count -= resp->count; 406 argp->count -= resp->count;
@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 660 if (ret == 0)
626 goto read_complete; /* all pages were read */ 661 goto read_complete; /* all pages were read */
627 662
628 pnfs_update_layout(inode, desc.ctx, IOMODE_READ); 663 pnfs_pageio_init_read(&pgio, inode);
629 if (rsize < PAGE_CACHE_SIZE) 664 if (rsize < PAGE_CACHE_SIZE)
630 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 665 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
631 else 666 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b68c8607770f..2b8e9a5e366a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,8 +263,11 @@ static match_table_t nfs_local_lock_tokens = {
263static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
265static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct vfsmount *);
266static int nfs_show_devname(struct seq_file *, struct vfsmount *);
267static int nfs_show_path(struct seq_file *, struct vfsmount *);
266static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct vfsmount *);
267static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 269static struct dentry *nfs_fs_mount(struct file_system_type *,
270 int, const char *, void *);
268static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
269 int flags, const char *dev_name, void *raw_data); 272 int flags, const char *dev_name, void *raw_data);
270static void nfs_put_super(struct super_block *); 273static void nfs_put_super(struct super_block *);
@@ -274,7 +277,7 @@ static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
274static struct file_system_type nfs_fs_type = { 277static struct file_system_type nfs_fs_type = {
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .name = "nfs", 279 .name = "nfs",
277 .get_sb = nfs_get_sb, 280 .mount = nfs_fs_mount,
278 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
279 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
280}; 283};
@@ -296,6 +299,8 @@ static const struct super_operations nfs_sops = {
296 .evict_inode = nfs_evict_inode, 299 .evict_inode = nfs_evict_inode,
297 .umount_begin = nfs_umount_begin, 300 .umount_begin = nfs_umount_begin,
298 .show_options = nfs_show_options, 301 .show_options = nfs_show_options,
302 .show_devname = nfs_show_devname,
303 .show_path = nfs_show_path,
299 .show_stats = nfs_show_stats, 304 .show_stats = nfs_show_stats,
300 .remount_fs = nfs_remount, 305 .remount_fs = nfs_remount,
301}; 306};
@@ -303,16 +308,16 @@ static const struct super_operations nfs_sops = {
303#ifdef CONFIG_NFS_V4 308#ifdef CONFIG_NFS_V4
304static int nfs4_validate_text_mount_data(void *options, 309static int nfs4_validate_text_mount_data(void *options,
305 struct nfs_parsed_mount_data *args, const char *dev_name); 310 struct nfs_parsed_mount_data *args, const char *dev_name);
306static int nfs4_try_mount(int flags, const char *dev_name, 311static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
307 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 312 struct nfs_parsed_mount_data *data);
308static int nfs4_get_sb(struct file_system_type *fs_type, 313static struct dentry *nfs4_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 314 int flags, const char *dev_name, void *raw_data);
310static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 315static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *raw_data); 316 int flags, const char *dev_name, void *raw_data);
312static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 317static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
313 int flags, const char *dev_name, void *raw_data); 318 int flags, const char *dev_name, void *raw_data);
314static int nfs4_referral_get_sb(struct file_system_type *fs_type, 319static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
315 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 320 int flags, const char *dev_name, void *raw_data);
316static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 321static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
317 int flags, const char *dev_name, void *raw_data); 322 int flags, const char *dev_name, void *raw_data);
318static void nfs4_kill_super(struct super_block *sb); 323static void nfs4_kill_super(struct super_block *sb);
@@ -320,7 +325,7 @@ static void nfs4_kill_super(struct super_block *sb);
320static struct file_system_type nfs4_fs_type = { 325static struct file_system_type nfs4_fs_type = {
321 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
322 .name = "nfs4", 327 .name = "nfs4",
323 .get_sb = nfs4_get_sb, 328 .mount = nfs4_mount,
324 .kill_sb = nfs4_kill_super, 329 .kill_sb = nfs4_kill_super,
325 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 330 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
326}; 331};
@@ -352,7 +357,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
352struct file_system_type nfs4_referral_fs_type = { 357struct file_system_type nfs4_referral_fs_type = {
353 .owner = THIS_MODULE, 358 .owner = THIS_MODULE,
354 .name = "nfs4", 359 .name = "nfs4",
355 .get_sb = nfs4_referral_get_sb, 360 .mount = nfs4_referral_mount,
356 .kill_sb = nfs4_kill_super, 361 .kill_sb = nfs4_kill_super,
357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 362 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
358}; 363};
@@ -366,6 +371,8 @@ static const struct super_operations nfs4_sops = {
366 .evict_inode = nfs4_evict_inode, 371 .evict_inode = nfs4_evict_inode,
367 .umount_begin = nfs_umount_begin, 372 .umount_begin = nfs_umount_begin,
368 .show_options = nfs_show_options, 373 .show_options = nfs_show_options,
374 .show_devname = nfs_show_devname,
375 .show_path = nfs_show_path,
369 .show_stats = nfs_show_stats, 376 .show_stats = nfs_show_stats,
370 .remount_fs = nfs_remount, 377 .remount_fs = nfs_remount,
371}; 378};
@@ -726,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
726 return 0; 733 return 0;
727} 734}
728 735
736static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
737{
738 char *page = (char *) __get_free_page(GFP_KERNEL);
739 char *devname, *dummy;
740 int err = 0;
741 if (!page)
742 return -ENOMEM;
743 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
744 if (IS_ERR(devname))
745 err = PTR_ERR(devname);
746 else
747 seq_escape(m, devname, " \t\n\\");
748 free_page((unsigned long)page);
749 return err;
750}
751
752static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
753{
754 seq_puts(m, "/");
755 return 0;
756}
757
729/* 758/*
730 * Present statistical information for this VFS mountpoint 759 * Present statistical information for this VFS mountpoint
731 */ 760 */
@@ -979,6 +1008,27 @@ static int nfs_parse_security_flavors(char *value,
979 return 1; 1008 return 1;
980} 1009}
981 1010
1011static int nfs_get_option_str(substring_t args[], char **option)
1012{
1013 kfree(*option);
1014 *option = match_strdup(args);
1015 return !option;
1016}
1017
1018static int nfs_get_option_ul(substring_t args[], unsigned long *option)
1019{
1020 int rc;
1021 char *string;
1022
1023 string = match_strdup(args);
1024 if (string == NULL)
1025 return -ENOMEM;
1026 rc = strict_strtoul(string, 10, option);
1027 kfree(string);
1028
1029 return rc;
1030}
1031
982/* 1032/*
983 * Error-check and convert a string of mount options from user space into 1033 * Error-check and convert a string of mount options from user space into
984 * a data structure. The whole mount string is processed; bad options are 1034 * a data structure. The whole mount string is processed; bad options are
@@ -1127,155 +1177,82 @@ static int nfs_parse_mount_options(char *raw,
1127 * options that take numeric values 1177 * options that take numeric values
1128 */ 1178 */
1129 case Opt_port: 1179 case Opt_port:
1130 string = match_strdup(args); 1180 if (nfs_get_option_ul(args, &option) ||
1131 if (string == NULL) 1181 option > USHRT_MAX)
1132 goto out_nomem;
1133 rc = strict_strtoul(string, 10, &option);
1134 kfree(string);
1135 if (rc != 0 || option > USHRT_MAX)
1136 goto out_invalid_value; 1182 goto out_invalid_value;
1137 mnt->nfs_server.port = option; 1183 mnt->nfs_server.port = option;
1138 break; 1184 break;
1139 case Opt_rsize: 1185 case Opt_rsize:
1140 string = match_strdup(args); 1186 if (nfs_get_option_ul(args, &option))
1141 if (string == NULL)
1142 goto out_nomem;
1143 rc = strict_strtoul(string, 10, &option);
1144 kfree(string);
1145 if (rc != 0)
1146 goto out_invalid_value; 1187 goto out_invalid_value;
1147 mnt->rsize = option; 1188 mnt->rsize = option;
1148 break; 1189 break;
1149 case Opt_wsize: 1190 case Opt_wsize:
1150 string = match_strdup(args); 1191 if (nfs_get_option_ul(args, &option))
1151 if (string == NULL)
1152 goto out_nomem;
1153 rc = strict_strtoul(string, 10, &option);
1154 kfree(string);
1155 if (rc != 0)
1156 goto out_invalid_value; 1192 goto out_invalid_value;
1157 mnt->wsize = option; 1193 mnt->wsize = option;
1158 break; 1194 break;
1159 case Opt_bsize: 1195 case Opt_bsize:
1160 string = match_strdup(args); 1196 if (nfs_get_option_ul(args, &option))
1161 if (string == NULL)
1162 goto out_nomem;
1163 rc = strict_strtoul(string, 10, &option);
1164 kfree(string);
1165 if (rc != 0)
1166 goto out_invalid_value; 1197 goto out_invalid_value;
1167 mnt->bsize = option; 1198 mnt->bsize = option;
1168 break; 1199 break;
1169 case Opt_timeo: 1200 case Opt_timeo:
1170 string = match_strdup(args); 1201 if (nfs_get_option_ul(args, &option) || option == 0)
1171 if (string == NULL)
1172 goto out_nomem;
1173 rc = strict_strtoul(string, 10, &option);
1174 kfree(string);
1175 if (rc != 0 || option == 0)
1176 goto out_invalid_value; 1202 goto out_invalid_value;
1177 mnt->timeo = option; 1203 mnt->timeo = option;
1178 break; 1204 break;
1179 case Opt_retrans: 1205 case Opt_retrans:
1180 string = match_strdup(args); 1206 if (nfs_get_option_ul(args, &option) || option == 0)
1181 if (string == NULL)
1182 goto out_nomem;
1183 rc = strict_strtoul(string, 10, &option);
1184 kfree(string);
1185 if (rc != 0 || option == 0)
1186 goto out_invalid_value; 1207 goto out_invalid_value;
1187 mnt->retrans = option; 1208 mnt->retrans = option;
1188 break; 1209 break;
1189 case Opt_acregmin: 1210 case Opt_acregmin:
1190 string = match_strdup(args); 1211 if (nfs_get_option_ul(args, &option))
1191 if (string == NULL)
1192 goto out_nomem;
1193 rc = strict_strtoul(string, 10, &option);
1194 kfree(string);
1195 if (rc != 0)
1196 goto out_invalid_value; 1212 goto out_invalid_value;
1197 mnt->acregmin = option; 1213 mnt->acregmin = option;
1198 break; 1214 break;
1199 case Opt_acregmax: 1215 case Opt_acregmax:
1200 string = match_strdup(args); 1216 if (nfs_get_option_ul(args, &option))
1201 if (string == NULL)
1202 goto out_nomem;
1203 rc = strict_strtoul(string, 10, &option);
1204 kfree(string);
1205 if (rc != 0)
1206 goto out_invalid_value; 1217 goto out_invalid_value;
1207 mnt->acregmax = option; 1218 mnt->acregmax = option;
1208 break; 1219 break;
1209 case Opt_acdirmin: 1220 case Opt_acdirmin:
1210 string = match_strdup(args); 1221 if (nfs_get_option_ul(args, &option))
1211 if (string == NULL)
1212 goto out_nomem;
1213 rc = strict_strtoul(string, 10, &option);
1214 kfree(string);
1215 if (rc != 0)
1216 goto out_invalid_value; 1222 goto out_invalid_value;
1217 mnt->acdirmin = option; 1223 mnt->acdirmin = option;
1218 break; 1224 break;
1219 case Opt_acdirmax: 1225 case Opt_acdirmax:
1220 string = match_strdup(args); 1226 if (nfs_get_option_ul(args, &option))
1221 if (string == NULL)
1222 goto out_nomem;
1223 rc = strict_strtoul(string, 10, &option);
1224 kfree(string);
1225 if (rc != 0)
1226 goto out_invalid_value; 1227 goto out_invalid_value;
1227 mnt->acdirmax = option; 1228 mnt->acdirmax = option;
1228 break; 1229 break;
1229 case Opt_actimeo: 1230 case Opt_actimeo:
1230 string = match_strdup(args); 1231 if (nfs_get_option_ul(args, &option))
1231 if (string == NULL)
1232 goto out_nomem;
1233 rc = strict_strtoul(string, 10, &option);
1234 kfree(string);
1235 if (rc != 0)
1236 goto out_invalid_value; 1232 goto out_invalid_value;
1237 mnt->acregmin = mnt->acregmax = 1233 mnt->acregmin = mnt->acregmax =
1238 mnt->acdirmin = mnt->acdirmax = option; 1234 mnt->acdirmin = mnt->acdirmax = option;
1239 break; 1235 break;
1240 case Opt_namelen: 1236 case Opt_namelen:
1241 string = match_strdup(args); 1237 if (nfs_get_option_ul(args, &option))
1242 if (string == NULL)
1243 goto out_nomem;
1244 rc = strict_strtoul(string, 10, &option);
1245 kfree(string);
1246 if (rc != 0)
1247 goto out_invalid_value; 1238 goto out_invalid_value;
1248 mnt->namlen = option; 1239 mnt->namlen = option;
1249 break; 1240 break;
1250 case Opt_mountport: 1241 case Opt_mountport:
1251 string = match_strdup(args); 1242 if (nfs_get_option_ul(args, &option) ||
1252 if (string == NULL) 1243 option > USHRT_MAX)
1253 goto out_nomem;
1254 rc = strict_strtoul(string, 10, &option);
1255 kfree(string);
1256 if (rc != 0 || option > USHRT_MAX)
1257 goto out_invalid_value; 1244 goto out_invalid_value;
1258 mnt->mount_server.port = option; 1245 mnt->mount_server.port = option;
1259 break; 1246 break;
1260 case Opt_mountvers: 1247 case Opt_mountvers:
1261 string = match_strdup(args); 1248 if (nfs_get_option_ul(args, &option) ||
1262 if (string == NULL)
1263 goto out_nomem;
1264 rc = strict_strtoul(string, 10, &option);
1265 kfree(string);
1266 if (rc != 0 ||
1267 option < NFS_MNT_VERSION || 1249 option < NFS_MNT_VERSION ||
1268 option > NFS_MNT3_VERSION) 1250 option > NFS_MNT3_VERSION)
1269 goto out_invalid_value; 1251 goto out_invalid_value;
1270 mnt->mount_server.version = option; 1252 mnt->mount_server.version = option;
1271 break; 1253 break;
1272 case Opt_nfsvers: 1254 case Opt_nfsvers:
1273 string = match_strdup(args); 1255 if (nfs_get_option_ul(args, &option))
1274 if (string == NULL)
1275 goto out_nomem;
1276 rc = strict_strtoul(string, 10, &option);
1277 kfree(string);
1278 if (rc != 0)
1279 goto out_invalid_value; 1256 goto out_invalid_value;
1280 switch (option) { 1257 switch (option) {
1281 case NFS2_VERSION: 1258 case NFS2_VERSION:
@@ -1295,12 +1272,7 @@ static int nfs_parse_mount_options(char *raw,
1295 } 1272 }
1296 break; 1273 break;
1297 case Opt_minorversion: 1274 case Opt_minorversion:
1298 string = match_strdup(args); 1275 if (nfs_get_option_ul(args, &option))
1299 if (string == NULL)
1300 goto out_nomem;
1301 rc = strict_strtoul(string, 10, &option);
1302 kfree(string);
1303 if (rc != 0)
1304 goto out_invalid_value; 1276 goto out_invalid_value;
1305 if (option > NFS4_MAX_MINOR_VERSION) 1277 if (option > NFS4_MAX_MINOR_VERSION)
1306 goto out_invalid_value; 1278 goto out_invalid_value;
@@ -1336,21 +1308,18 @@ static int nfs_parse_mount_options(char *raw,
1336 case Opt_xprt_udp: 1308 case Opt_xprt_udp:
1337 mnt->flags &= ~NFS_MOUNT_TCP; 1309 mnt->flags &= ~NFS_MOUNT_TCP;
1338 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1310 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1339 kfree(string);
1340 break; 1311 break;
1341 case Opt_xprt_tcp6: 1312 case Opt_xprt_tcp6:
1342 protofamily = AF_INET6; 1313 protofamily = AF_INET6;
1343 case Opt_xprt_tcp: 1314 case Opt_xprt_tcp:
1344 mnt->flags |= NFS_MOUNT_TCP; 1315 mnt->flags |= NFS_MOUNT_TCP;
1345 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1316 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1346 kfree(string);
1347 break; 1317 break;
1348 case Opt_xprt_rdma: 1318 case Opt_xprt_rdma:
1349 /* vector side protocols to TCP */ 1319 /* vector side protocols to TCP */
1350 mnt->flags |= NFS_MOUNT_TCP; 1320 mnt->flags |= NFS_MOUNT_TCP;
1351 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1321 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1352 xprt_load_transport(string); 1322 xprt_load_transport(string);
1353 kfree(string);
1354 break; 1323 break;
1355 default: 1324 default:
1356 dfprintk(MOUNT, "NFS: unrecognized " 1325 dfprintk(MOUNT, "NFS: unrecognized "
@@ -1358,6 +1327,7 @@ static int nfs_parse_mount_options(char *raw,
1358 kfree(string); 1327 kfree(string);
1359 return 0; 1328 return 0;
1360 } 1329 }
1330 kfree(string);
1361 break; 1331 break;
1362 case Opt_mountproto: 1332 case Opt_mountproto:
1363 string = match_strdup(args); 1333 string = match_strdup(args);
@@ -1400,18 +1370,13 @@ static int nfs_parse_mount_options(char *raw,
1400 goto out_invalid_address; 1370 goto out_invalid_address;
1401 break; 1371 break;
1402 case Opt_clientaddr: 1372 case Opt_clientaddr:
1403 string = match_strdup(args); 1373 if (nfs_get_option_str(args, &mnt->client_address))
1404 if (string == NULL)
1405 goto out_nomem; 1374 goto out_nomem;
1406 kfree(mnt->client_address);
1407 mnt->client_address = string;
1408 break; 1375 break;
1409 case Opt_mounthost: 1376 case Opt_mounthost:
1410 string = match_strdup(args); 1377 if (nfs_get_option_str(args,
1411 if (string == NULL) 1378 &mnt->mount_server.hostname))
1412 goto out_nomem; 1379 goto out_nomem;
1413 kfree(mnt->mount_server.hostname);
1414 mnt->mount_server.hostname = string;
1415 break; 1380 break;
1416 case Opt_mountaddr: 1381 case Opt_mountaddr:
1417 string = match_strdup(args); 1382 string = match_strdup(args);
@@ -1451,11 +1416,8 @@ static int nfs_parse_mount_options(char *raw,
1451 }; 1416 };
1452 break; 1417 break;
1453 case Opt_fscache_uniq: 1418 case Opt_fscache_uniq:
1454 string = match_strdup(args); 1419 if (nfs_get_option_str(args, &mnt->fscache_uniq))
1455 if (string == NULL)
1456 goto out_nomem; 1420 goto out_nomem;
1457 kfree(mnt->fscache_uniq);
1458 mnt->fscache_uniq = string;
1459 mnt->options |= NFS_OPTION_FSCACHE; 1421 mnt->options |= NFS_OPTION_FSCACHE;
1460 break; 1422 break;
1461 case Opt_local_lock: 1423 case Opt_local_lock:
@@ -1665,99 +1627,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1665 return nfs_walk_authlist(args, &request); 1627 return nfs_walk_authlist(args, &request);
1666} 1628}
1667 1629
1668static int nfs_parse_simple_hostname(const char *dev_name, 1630/*
1669 char **hostname, size_t maxnamlen, 1631 * Split "dev_name" into "hostname:export_path".
1670 char **export_path, size_t maxpathlen) 1632 *
1633 * The leftmost colon demarks the split between the server's hostname
1634 * and the export path. If the hostname starts with a left square
1635 * bracket, then it may contain colons.
1636 *
1637 * Note: caller frees hostname and export path, even on error.
1638 */
1639static int nfs_parse_devname(const char *dev_name,
1640 char **hostname, size_t maxnamlen,
1641 char **export_path, size_t maxpathlen)
1671{ 1642{
1672 size_t len; 1643 size_t len;
1673 char *colon, *comma; 1644 char *end;
1674
1675 colon = strchr(dev_name, ':');
1676 if (colon == NULL)
1677 goto out_bad_devname;
1678
1679 len = colon - dev_name;
1680 if (len > maxnamlen)
1681 goto out_hostname;
1682 1645
1683 /* N.B. caller will free nfs_server.hostname in all cases */ 1646 /* Is the host name protected with square brakcets? */
1684 *hostname = kstrndup(dev_name, len, GFP_KERNEL); 1647 if (*dev_name == '[') {
1685 if (!*hostname) 1648 end = strchr(++dev_name, ']');
1686 goto out_nomem; 1649 if (end == NULL || end[1] != ':')
1687
1688 /* kill possible hostname list: not supported */
1689 comma = strchr(*hostname, ',');
1690 if (comma != NULL) {
1691 if (comma == *hostname)
1692 goto out_bad_devname; 1650 goto out_bad_devname;
1693 *comma = '\0';
1694 }
1695 1651
1696 colon++; 1652 len = end - dev_name;
1697 len = strlen(colon); 1653 end++;
1698 if (len > maxpathlen) 1654 } else {
1699 goto out_path; 1655 char *comma;
1700 *export_path = kstrndup(colon, len, GFP_KERNEL);
1701 if (!*export_path)
1702 goto out_nomem;
1703
1704 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1705 return 0;
1706
1707out_bad_devname:
1708 dfprintk(MOUNT, "NFS: device name not in host:path format\n");
1709 return -EINVAL;
1710
1711out_nomem:
1712 dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
1713 return -ENOMEM;
1714
1715out_hostname:
1716 dfprintk(MOUNT, "NFS: server hostname too long\n");
1717 return -ENAMETOOLONG;
1718
1719out_path:
1720 dfprintk(MOUNT, "NFS: export pathname too long\n");
1721 return -ENAMETOOLONG;
1722}
1723
1724/*
1725 * Hostname has square brackets around it because it contains one or
1726 * more colons. We look for the first closing square bracket, and a
1727 * colon must follow it.
1728 */
1729static int nfs_parse_protected_hostname(const char *dev_name,
1730 char **hostname, size_t maxnamlen,
1731 char **export_path, size_t maxpathlen)
1732{
1733 size_t len;
1734 char *start, *end;
1735 1656
1736 start = (char *)(dev_name + 1); 1657 end = strchr(dev_name, ':');
1658 if (end == NULL)
1659 goto out_bad_devname;
1660 len = end - dev_name;
1737 1661
1738 end = strchr(start, ']'); 1662 /* kill possible hostname list: not supported */
1739 if (end == NULL) 1663 comma = strchr(dev_name, ',');
1740 goto out_bad_devname; 1664 if (comma != NULL && comma < end)
1741 if (*(end + 1) != ':') 1665 *comma = 0;
1742 goto out_bad_devname; 1666 }
1743 1667
1744 len = end - start;
1745 if (len > maxnamlen) 1668 if (len > maxnamlen)
1746 goto out_hostname; 1669 goto out_hostname;
1747 1670
1748 /* N.B. caller will free nfs_server.hostname in all cases */ 1671 /* N.B. caller will free nfs_server.hostname in all cases */
1749 *hostname = kstrndup(start, len, GFP_KERNEL); 1672 *hostname = kstrndup(dev_name, len, GFP_KERNEL);
1750 if (*hostname == NULL) 1673 if (*hostname == NULL)
1751 goto out_nomem; 1674 goto out_nomem;
1752 1675 len = strlen(++end);
1753 end += 2;
1754 len = strlen(end);
1755 if (len > maxpathlen) 1676 if (len > maxpathlen)
1756 goto out_path; 1677 goto out_path;
1757 *export_path = kstrndup(end, len, GFP_KERNEL); 1678 *export_path = kstrndup(end, len, GFP_KERNEL);
1758 if (!*export_path) 1679 if (!*export_path)
1759 goto out_nomem; 1680 goto out_nomem;
1760 1681
1682 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1761 return 0; 1683 return 0;
1762 1684
1763out_bad_devname: 1685out_bad_devname:
@@ -1778,29 +1700,6 @@ out_path:
1778} 1700}
1779 1701
1780/* 1702/*
1781 * Split "dev_name" into "hostname:export_path".
1782 *
1783 * The leftmost colon demarks the split between the server's hostname
1784 * and the export path. If the hostname starts with a left square
1785 * bracket, then it may contain colons.
1786 *
1787 * Note: caller frees hostname and export path, even on error.
1788 */
1789static int nfs_parse_devname(const char *dev_name,
1790 char **hostname, size_t maxnamlen,
1791 char **export_path, size_t maxpathlen)
1792{
1793 if (*dev_name == '[')
1794 return nfs_parse_protected_hostname(dev_name,
1795 hostname, maxnamlen,
1796 export_path, maxpathlen);
1797
1798 return nfs_parse_simple_hostname(dev_name,
1799 hostname, maxnamlen,
1800 export_path, maxpathlen);
1801}
1802
1803/*
1804 * Validate the NFS2/NFS3 mount data 1703 * Validate the NFS2/NFS3 mount data
1805 * - fills in the mount root filehandle 1704 * - fills in the mount root filehandle
1806 * 1705 *
@@ -2267,19 +2166,19 @@ static int nfs_bdi_register(struct nfs_server *server)
2267 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2166 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2268} 2167}
2269 2168
2270static int nfs_get_sb(struct file_system_type *fs_type, 2169static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2271 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2170 int flags, const char *dev_name, void *raw_data)
2272{ 2171{
2273 struct nfs_server *server = NULL; 2172 struct nfs_server *server = NULL;
2274 struct super_block *s; 2173 struct super_block *s;
2275 struct nfs_parsed_mount_data *data; 2174 struct nfs_parsed_mount_data *data;
2276 struct nfs_fh *mntfh; 2175 struct nfs_fh *mntfh;
2277 struct dentry *mntroot; 2176 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2278 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2177 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2279 struct nfs_sb_mountdata sb_mntdata = { 2178 struct nfs_sb_mountdata sb_mntdata = {
2280 .mntflags = flags, 2179 .mntflags = flags,
2281 }; 2180 };
2282 int error = -ENOMEM; 2181 int error;
2283 2182
2284 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2183 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2285 mntfh = nfs_alloc_fhandle(); 2184 mntfh = nfs_alloc_fhandle();
@@ -2290,12 +2189,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2290 2189
2291 /* Validate the mount data */ 2190 /* Validate the mount data */
2292 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2191 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2293 if (error < 0) 2192 if (error < 0) {
2193 mntroot = ERR_PTR(error);
2294 goto out; 2194 goto out;
2195 }
2295 2196
2296#ifdef CONFIG_NFS_V4 2197#ifdef CONFIG_NFS_V4
2297 if (data->version == 4) { 2198 if (data->version == 4) {
2298 error = nfs4_try_mount(flags, dev_name, data, mnt); 2199 mntroot = nfs4_try_mount(flags, dev_name, data);
2299 kfree(data->client_address); 2200 kfree(data->client_address);
2300 kfree(data->nfs_server.export_path); 2201 kfree(data->nfs_server.export_path);
2301 goto out; 2202 goto out;
@@ -2305,7 +2206,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2305 /* Get a volume representation */ 2206 /* Get a volume representation */
2306 server = nfs_create_server(data, mntfh); 2207 server = nfs_create_server(data, mntfh);
2307 if (IS_ERR(server)) { 2208 if (IS_ERR(server)) {
2308 error = PTR_ERR(server); 2209 mntroot = ERR_CAST(server);
2309 goto out; 2210 goto out;
2310 } 2211 }
2311 sb_mntdata.server = server; 2212 sb_mntdata.server = server;
@@ -2316,7 +2217,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2316 /* Get a superblock - note that we may end up sharing one that already exists */ 2217 /* Get a superblock - note that we may end up sharing one that already exists */
2317 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2218 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
2318 if (IS_ERR(s)) { 2219 if (IS_ERR(s)) {
2319 error = PTR_ERR(s); 2220 mntroot = ERR_CAST(s);
2320 goto out_err_nosb; 2221 goto out_err_nosb;
2321 } 2222 }
2322 2223
@@ -2325,8 +2226,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2325 server = NULL; 2226 server = NULL;
2326 } else { 2227 } else {
2327 error = nfs_bdi_register(server); 2228 error = nfs_bdi_register(server);
2328 if (error) 2229 if (error) {
2230 mntroot = ERR_PTR(error);
2329 goto error_splat_bdi; 2231 goto error_splat_bdi;
2232 }
2330 } 2233 }
2331 2234
2332 if (!s->s_root) { 2235 if (!s->s_root) {
@@ -2336,20 +2239,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2336 s, data ? data->fscache_uniq : NULL, NULL); 2239 s, data ? data->fscache_uniq : NULL, NULL);
2337 } 2240 }
2338 2241
2339 mntroot = nfs_get_root(s, mntfh); 2242 mntroot = nfs_get_root(s, mntfh, dev_name);
2340 if (IS_ERR(mntroot)) { 2243 if (IS_ERR(mntroot))
2341 error = PTR_ERR(mntroot);
2342 goto error_splat_super; 2244 goto error_splat_super;
2343 }
2344 2245
2345 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2246 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2346 if (error) 2247 if (error)
2347 goto error_splat_root; 2248 goto error_splat_root;
2348 2249
2349 s->s_flags |= MS_ACTIVE; 2250 s->s_flags |= MS_ACTIVE;
2350 mnt->mnt_sb = s;
2351 mnt->mnt_root = mntroot;
2352 error = 0;
2353 2251
2354out: 2252out:
2355 kfree(data->nfs_server.hostname); 2253 kfree(data->nfs_server.hostname);
@@ -2359,7 +2257,7 @@ out:
2359out_free_fh: 2257out_free_fh:
2360 nfs_free_fhandle(mntfh); 2258 nfs_free_fhandle(mntfh);
2361 kfree(data); 2259 kfree(data);
2362 return error; 2260 return mntroot;
2363 2261
2364out_err_nosb: 2262out_err_nosb:
2365 nfs_free_server(server); 2263 nfs_free_server(server);
@@ -2367,6 +2265,7 @@ out_err_nosb:
2367 2265
2368error_splat_root: 2266error_splat_root:
2369 dput(mntroot); 2267 dput(mntroot);
2268 mntroot = ERR_PTR(error);
2370error_splat_super: 2269error_splat_super:
2371 if (server && !s->s_root) 2270 if (server && !s->s_root)
2372 bdi_unregister(&server->backing_dev_info); 2271 bdi_unregister(&server->backing_dev_info);
@@ -2450,7 +2349,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2450 nfs_fscache_get_super_cookie(s, NULL, data); 2349 nfs_fscache_get_super_cookie(s, NULL, data);
2451 } 2350 }
2452 2351
2453 mntroot = nfs_get_root(s, data->fh); 2352 mntroot = nfs_get_root(s, data->fh, dev_name);
2454 if (IS_ERR(mntroot)) { 2353 if (IS_ERR(mntroot)) {
2455 error = PTR_ERR(mntroot); 2354 error = PTR_ERR(mntroot);
2456 goto error_splat_super; 2355 goto error_splat_super;
@@ -2718,7 +2617,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2718 s, data ? data->fscache_uniq : NULL, NULL); 2617 s, data ? data->fscache_uniq : NULL, NULL);
2719 } 2618 }
2720 2619
2721 mntroot = nfs4_get_root(s, mntfh); 2620 mntroot = nfs4_get_root(s, mntfh, dev_name);
2722 if (IS_ERR(mntroot)) { 2621 if (IS_ERR(mntroot)) {
2723 error = PTR_ERR(mntroot); 2622 error = PTR_ERR(mntroot);
2724 goto error_splat_super; 2623 goto error_splat_super;
@@ -2771,27 +2670,6 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2771 return root_mnt; 2670 return root_mnt;
2772} 2671}
2773 2672
2774static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2775{
2776 char *page = (char *) __get_free_page(GFP_KERNEL);
2777 char *devname, *tmp;
2778
2779 if (page == NULL)
2780 return;
2781 devname = nfs_path(path->mnt->mnt_devname,
2782 path->mnt->mnt_root, path->dentry,
2783 page, PAGE_SIZE);
2784 if (IS_ERR(devname))
2785 goto out_freepage;
2786 tmp = kstrdup(devname, GFP_KERNEL);
2787 if (tmp == NULL)
2788 goto out_freepage;
2789 kfree(mnt->mnt_devname);
2790 mnt->mnt_devname = tmp;
2791out_freepage:
2792 free_page((unsigned long)page);
2793}
2794
2795struct nfs_referral_count { 2673struct nfs_referral_count {
2796 struct list_head list; 2674 struct list_head list;
2797 const struct task_struct *task; 2675 const struct task_struct *task;
@@ -2858,17 +2736,18 @@ static void nfs_referral_loop_unprotect(void)
2858 kfree(p); 2736 kfree(p);
2859} 2737}
2860 2738
2861static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2739static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2862 const char *export_path, struct vfsmount *mnt_target) 2740 const char *export_path)
2863{ 2741{
2864 struct nameidata *nd = NULL; 2742 struct nameidata *nd = NULL;
2865 struct mnt_namespace *ns_private; 2743 struct mnt_namespace *ns_private;
2866 struct super_block *s; 2744 struct super_block *s;
2745 struct dentry *dentry;
2867 int ret; 2746 int ret;
2868 2747
2869 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 2748 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2870 if (nd == NULL) 2749 if (nd == NULL)
2871 return -ENOMEM; 2750 return ERR_PTR(-ENOMEM);
2872 2751
2873 ns_private = create_mnt_ns(root_mnt); 2752 ns_private = create_mnt_ns(root_mnt);
2874 ret = PTR_ERR(ns_private); 2753 ret = PTR_ERR(ns_private);
@@ -2890,32 +2769,27 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2890 2769
2891 s = nd->path.mnt->mnt_sb; 2770 s = nd->path.mnt->mnt_sb;
2892 atomic_inc(&s->s_active); 2771 atomic_inc(&s->s_active);
2893 mnt_target->mnt_sb = s; 2772 dentry = dget(nd->path.dentry);
2894 mnt_target->mnt_root = dget(nd->path.dentry);
2895
2896 /* Correct the device pathname */
2897 nfs_fix_devname(&nd->path, mnt_target);
2898 2773
2899 path_put(&nd->path); 2774 path_put(&nd->path);
2900 kfree(nd); 2775 kfree(nd);
2901 down_write(&s->s_umount); 2776 down_write(&s->s_umount);
2902 return 0; 2777 return dentry;
2903out_put_mnt_ns: 2778out_put_mnt_ns:
2904 put_mnt_ns(ns_private); 2779 put_mnt_ns(ns_private);
2905out_mntput: 2780out_mntput:
2906 mntput(root_mnt); 2781 mntput(root_mnt);
2907out_err: 2782out_err:
2908 kfree(nd); 2783 kfree(nd);
2909 return ret; 2784 return ERR_PTR(ret);
2910} 2785}
2911 2786
2912static int nfs4_try_mount(int flags, const char *dev_name, 2787static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2913 struct nfs_parsed_mount_data *data, 2788 struct nfs_parsed_mount_data *data)
2914 struct vfsmount *mnt)
2915{ 2789{
2916 char *export_path; 2790 char *export_path;
2917 struct vfsmount *root_mnt; 2791 struct vfsmount *root_mnt;
2918 int error; 2792 struct dentry *res;
2919 2793
2920 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2794 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2921 2795
@@ -2925,26 +2799,25 @@ static int nfs4_try_mount(int flags, const char *dev_name,
2925 data->nfs_server.hostname); 2799 data->nfs_server.hostname);
2926 data->nfs_server.export_path = export_path; 2800 data->nfs_server.export_path = export_path;
2927 2801
2928 error = PTR_ERR(root_mnt); 2802 res = ERR_CAST(root_mnt);
2929 if (IS_ERR(root_mnt)) 2803 if (!IS_ERR(root_mnt))
2930 goto out; 2804 res = nfs_follow_remote_path(root_mnt, export_path);
2931
2932 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2933 2805
2934out: 2806 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2935 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, 2807 IS_ERR(res) ? PTR_ERR(res) : 0,
2936 error != 0 ? " [error]" : ""); 2808 IS_ERR(res) ? " [error]" : "");
2937 return error; 2809 return res;
2938} 2810}
2939 2811
2940/* 2812/*
2941 * Get the superblock for an NFS4 mountpoint 2813 * Get the superblock for an NFS4 mountpoint
2942 */ 2814 */
2943static int nfs4_get_sb(struct file_system_type *fs_type, 2815static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2944 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2816 int flags, const char *dev_name, void *raw_data)
2945{ 2817{
2946 struct nfs_parsed_mount_data *data; 2818 struct nfs_parsed_mount_data *data;
2947 int error = -ENOMEM; 2819 int error = -ENOMEM;
2820 struct dentry *res = ERR_PTR(-ENOMEM);
2948 2821
2949 data = nfs_alloc_parsed_mount_data(4); 2822 data = nfs_alloc_parsed_mount_data(4);
2950 if (data == NULL) 2823 if (data == NULL)
@@ -2952,10 +2825,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2952 2825
2953 /* Validate the mount data */ 2826 /* Validate the mount data */
2954 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2827 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2955 if (error < 0) 2828 if (error < 0) {
2829 res = ERR_PTR(error);
2956 goto out; 2830 goto out;
2831 }
2957 2832
2958 error = nfs4_try_mount(flags, dev_name, data, mnt); 2833 res = nfs4_try_mount(flags, dev_name, data);
2834 if (IS_ERR(res))
2835 error = PTR_ERR(res);
2959 2836
2960out: 2837out:
2961 kfree(data->client_address); 2838 kfree(data->client_address);
@@ -2964,9 +2841,9 @@ out:
2964 kfree(data->fscache_uniq); 2841 kfree(data->fscache_uniq);
2965out_free_data: 2842out_free_data:
2966 kfree(data); 2843 kfree(data);
2967 dprintk("<-- nfs4_get_sb() = %d%s\n", error, 2844 dprintk("<-- nfs4_mount() = %d%s\n", error,
2968 error != 0 ? " [error]" : ""); 2845 error != 0 ? " [error]" : "");
2969 return error; 2846 return res;
2970} 2847}
2971 2848
2972static void nfs4_kill_super(struct super_block *sb) 2849static void nfs4_kill_super(struct super_block *sb)
@@ -3033,7 +2910,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
3033 nfs_fscache_get_super_cookie(s, NULL, data); 2910 nfs_fscache_get_super_cookie(s, NULL, data);
3034 } 2911 }
3035 2912
3036 mntroot = nfs4_get_root(s, data->fh); 2913 mntroot = nfs4_get_root(s, data->fh, dev_name);
3037 if (IS_ERR(mntroot)) { 2914 if (IS_ERR(mntroot)) {
3038 error = PTR_ERR(mntroot); 2915 error = PTR_ERR(mntroot);
3039 goto error_splat_super; 2916 goto error_splat_super;
@@ -3120,7 +2997,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3120 nfs_fscache_get_super_cookie(s, NULL, data); 2997 nfs_fscache_get_super_cookie(s, NULL, data);
3121 } 2998 }
3122 2999
3123 mntroot = nfs4_get_root(s, mntfh); 3000 mntroot = nfs4_get_root(s, mntfh, dev_name);
3124 if (IS_ERR(mntroot)) { 3001 if (IS_ERR(mntroot)) {
3125 error = PTR_ERR(mntroot); 3002 error = PTR_ERR(mntroot);
3126 goto error_splat_super; 3003 goto error_splat_super;
@@ -3160,16 +3037,15 @@ error_splat_bdi:
3160/* 3037/*
3161 * Create an NFS4 server record on referral traversal 3038 * Create an NFS4 server record on referral traversal
3162 */ 3039 */
3163static int nfs4_referral_get_sb(struct file_system_type *fs_type, 3040static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3164 int flags, const char *dev_name, void *raw_data, 3041 int flags, const char *dev_name, void *raw_data)
3165 struct vfsmount *mnt)
3166{ 3042{
3167 struct nfs_clone_mount *data = raw_data; 3043 struct nfs_clone_mount *data = raw_data;
3168 char *export_path; 3044 char *export_path;
3169 struct vfsmount *root_mnt; 3045 struct vfsmount *root_mnt;
3170 int error; 3046 struct dentry *res;
3171 3047
3172 dprintk("--> nfs4_referral_get_sb()\n"); 3048 dprintk("--> nfs4_referral_mount()\n");
3173 3049
3174 export_path = data->mnt_path; 3050 export_path = data->mnt_path;
3175 data->mnt_path = "/"; 3051 data->mnt_path = "/";
@@ -3178,15 +3054,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type,
3178 flags, data, data->hostname); 3054 flags, data, data->hostname);
3179 data->mnt_path = export_path; 3055 data->mnt_path = export_path;
3180 3056
3181 error = PTR_ERR(root_mnt); 3057 res = ERR_CAST(root_mnt);
3182 if (IS_ERR(root_mnt)) 3058 if (!IS_ERR(root_mnt))
3183 goto out; 3059 res = nfs_follow_remote_path(root_mnt, export_path);
3184 3060 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3185 error = nfs_follow_remote_path(root_mnt, export_path, mnt); 3061 IS_ERR(res) ? PTR_ERR(res) : 0,
3186out: 3062 IS_ERR(res) ? " [error]" : "");
3187 dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, 3063 return res;
3188 error != 0 ? " [error]" : "");
3189 return error;
3190} 3064}
3191 3065
3192#endif /* CONFIG_NFS_V4 */ 3066#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..8d6864c2a5fa 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -148,6 +148,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret = 0;
151 void *devname_garbage = NULL;
151 152
152 /* 153 /*
153 * Hey, we raced with lookup... See if we need to transfer 154 * Hey, we raced with lookup... See if we need to transfer
@@ -157,6 +158,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
157 spin_lock(&alias->d_lock); 158 spin_lock(&alias->d_lock);
158 if (alias->d_inode != NULL && 159 if (alias->d_inode != NULL &&
159 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata;
160 alias->d_fsdata = data; 162 alias->d_fsdata = data;
161 alias->d_flags |= DCACHE_NFSFS_RENAMED; 163 alias->d_flags |= DCACHE_NFSFS_RENAMED;
162 ret = 1; 164 ret = 1;
@@ -164,6 +166,13 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
164 spin_unlock(&alias->d_lock); 166 spin_unlock(&alias->d_lock);
165 nfs_dec_sillycount(dir); 167 nfs_dec_sillycount(dir);
166 dput(alias); 168 dput(alias);
169 /*
170 * If we'd displaced old cached devname, free it. At that
171 * point dentry is definitely not a root, so we won't need
172 * that anymore.
173 */
174 if (devname_garbage)
175 kfree(devname_garbage);
167 return ret; 176 return ret;
168 } 177 }
169 data->dir = igrab(dir); 178 data->dir = igrab(dir);
@@ -180,7 +189,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 189 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 190 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 191 if (!IS_ERR(task))
183 rpc_put_task(task); 192 rpc_put_task_async(task);
184 return 1; 193 return 1;
185} 194}
186 195
@@ -252,6 +261,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
252{ 261{
253 struct nfs_unlinkdata *data; 262 struct nfs_unlinkdata *data;
254 int status = -ENOMEM; 263 int status = -ENOMEM;
264 void *devname_garbage = NULL;
255 265
256 data = kzalloc(sizeof(*data), GFP_KERNEL); 266 data = kzalloc(sizeof(*data), GFP_KERNEL);
257 if (data == NULL) 267 if (data == NULL)
@@ -269,8 +279,16 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
269 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 279 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
270 goto out_unlock; 280 goto out_unlock;
271 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 281 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
282 devname_garbage = dentry->d_fsdata;
272 dentry->d_fsdata = data; 283 dentry->d_fsdata = data;
273 spin_unlock(&dentry->d_lock); 284 spin_unlock(&dentry->d_lock);
285 /*
286 * If we'd displaced old cached devname, free it. At that
287 * point dentry is definitely not a root, so we won't need
288 * that anymore.
289 */
290 if (devname_garbage)
291 kfree(devname_garbage);
274 return 0; 292 return 0;
275out_unlock: 293out_unlock:
276 spin_unlock(&dentry->d_lock); 294 spin_unlock(&dentry->d_lock);
@@ -299,6 +317,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
299 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 317 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
300 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 318 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
301 data = dentry->d_fsdata; 319 data = dentry->d_fsdata;
320 dentry->d_fsdata = NULL;
302 } 321 }
303 spin_unlock(&dentry->d_lock); 322 spin_unlock(&dentry->d_lock);
304 323
@@ -315,6 +334,7 @@ nfs_cancel_async_unlink(struct dentry *dentry)
315 struct nfs_unlinkdata *data = dentry->d_fsdata; 334 struct nfs_unlinkdata *data = dentry->d_fsdata;
316 335
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 336 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
337 dentry->d_fsdata = NULL;
318 spin_unlock(&dentry->d_lock); 338 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data); 339 nfs_free_unlinkdata(data);
320 return; 340 return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..47a3ad63e0d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
28#include "iostat.h" 28#include "iostat.h"
29#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h" 30#include "fscache.h"
31#include "pnfs.h"
31 32
32#define NFSDBG_FACILITY NFSDBG_PAGECACHE 33#define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 34
@@ -96,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
96 97
97static void nfs_writedata_release(struct nfs_write_data *wdata) 98static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 99{
100 put_lseg(wdata->lseg);
99 put_nfs_open_context(wdata->args.context); 101 put_nfs_open_context(wdata->args.context);
100 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
101} 103}
@@ -781,25 +783,21 @@ static int flush_task_priority(int how)
781 return RPC_PRIORITY_NORMAL; 783 return RPC_PRIORITY_NORMAL;
782} 784}
783 785
784/* 786int nfs_initiate_write(struct nfs_write_data *data,
785 * Set up the argument/result storage required for the RPC call. 787 struct rpc_clnt *clnt,
786 */ 788 const struct rpc_call_ops *call_ops,
787static int nfs_write_rpcsetup(struct nfs_page *req, 789 int how)
788 struct nfs_write_data *data,
789 const struct rpc_call_ops *call_ops,
790 unsigned int count, unsigned int offset,
791 int how)
792{ 790{
793 struct inode *inode = req->wb_context->path.dentry->d_inode; 791 struct inode *inode = data->inode;
794 int priority = flush_task_priority(how); 792 int priority = flush_task_priority(how);
795 struct rpc_task *task; 793 struct rpc_task *task;
796 struct rpc_message msg = { 794 struct rpc_message msg = {
797 .rpc_argp = &data->args, 795 .rpc_argp = &data->args,
798 .rpc_resp = &data->res, 796 .rpc_resp = &data->res,
799 .rpc_cred = req->wb_context->cred, 797 .rpc_cred = data->cred,
800 }; 798 };
801 struct rpc_task_setup task_setup_data = { 799 struct rpc_task_setup task_setup_data = {
802 .rpc_client = NFS_CLIENT(inode), 800 .rpc_client = clnt,
803 .task = &data->task, 801 .task = &data->task,
804 .rpc_message = &msg, 802 .rpc_message = &msg,
805 .callback_ops = call_ops, 803 .callback_ops = call_ops,
@@ -810,12 +808,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
810 }; 808 };
811 int ret = 0; 809 int ret = 0;
812 810
811 /* Set up the initial task struct. */
812 NFS_PROTO(inode)->write_setup(data, &msg);
813
814 dprintk("NFS: %5u initiated write call "
815 "(req %s/%lld, %u bytes @ offset %llu)\n",
816 data->task.tk_pid,
817 inode->i_sb->s_id,
818 (long long)NFS_FILEID(inode),
819 data->args.count,
820 (unsigned long long)data->args.offset);
821
822 task = rpc_run_task(&task_setup_data);
823 if (IS_ERR(task)) {
824 ret = PTR_ERR(task);
825 goto out;
826 }
827 if (how & FLUSH_SYNC) {
828 ret = rpc_wait_for_completion_task(task);
829 if (ret == 0)
830 ret = task->tk_status;
831 }
832 rpc_put_task(task);
833out:
834 return ret;
835}
836EXPORT_SYMBOL_GPL(nfs_initiate_write);
837
838/*
839 * Set up the argument/result storage required for the RPC call.
840 */
841static int nfs_write_rpcsetup(struct nfs_page *req,
842 struct nfs_write_data *data,
843 const struct rpc_call_ops *call_ops,
844 unsigned int count, unsigned int offset,
845 struct pnfs_layout_segment *lseg,
846 int how)
847{
848 struct inode *inode = req->wb_context->path.dentry->d_inode;
849
813 /* Set up the RPC argument and reply structs 850 /* Set up the RPC argument and reply structs
814 * NB: take care not to mess about with data->commit et al. */ 851 * NB: take care not to mess about with data->commit et al. */
815 852
816 data->req = req; 853 data->req = req;
817 data->inode = inode = req->wb_context->path.dentry->d_inode; 854 data->inode = inode = req->wb_context->path.dentry->d_inode;
818 data->cred = msg.rpc_cred; 855 data->cred = req->wb_context->cred;
856 data->lseg = get_lseg(lseg);
819 857
820 data->args.fh = NFS_FH(inode); 858 data->args.fh = NFS_FH(inode);
821 data->args.offset = req_offset(req) + offset; 859 data->args.offset = req_offset(req) + offset;
@@ -836,30 +874,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
836 data->res.verf = &data->verf; 874 data->res.verf = &data->verf;
837 nfs_fattr_init(&data->fattr); 875 nfs_fattr_init(&data->fattr);
838 876
839 /* Set up the initial task struct. */ 877 if (data->lseg &&
840 NFS_PROTO(inode)->write_setup(data, &msg); 878 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
841 879 return 0;
842 dprintk("NFS: %5u initiated write call "
843 "(req %s/%lld, %u bytes @ offset %llu)\n",
844 data->task.tk_pid,
845 inode->i_sb->s_id,
846 (long long)NFS_FILEID(inode),
847 count,
848 (unsigned long long)data->args.offset);
849 880
850 task = rpc_run_task(&task_setup_data); 881 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
851 if (IS_ERR(task)) {
852 ret = PTR_ERR(task);
853 goto out;
854 }
855 if (how & FLUSH_SYNC) {
856 ret = rpc_wait_for_completion_task(task);
857 if (ret == 0)
858 ret = task->tk_status;
859 }
860 rpc_put_task(task);
861out:
862 return ret;
863} 882}
864 883
865/* If a nfs_flush_* function fails, it should remove reqs from @head and 884/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
879 * Generate multiple small requests to write out a single 898 * Generate multiple small requests to write out a single
880 * contiguous dirty area on one page. 899 * contiguous dirty area on one page.
881 */ 900 */
882static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 901static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
883{ 902{
884 struct nfs_page *req = nfs_list_entry(head->next); 903 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
885 struct page *page = req->wb_page; 904 struct page *page = req->wb_page;
886 struct nfs_write_data *data; 905 struct nfs_write_data *data;
887 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 906 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
888 unsigned int offset; 907 unsigned int offset;
889 int requests = 0; 908 int requests = 0;
890 int ret = 0; 909 int ret = 0;
910 struct pnfs_layout_segment *lseg;
891 LIST_HEAD(list); 911 LIST_HEAD(list);
892 912
893 nfs_list_remove_request(req); 913 nfs_list_remove_request(req);
894 914
895 nbytes = count; 915 nbytes = desc->pg_count;
896 do { 916 do {
897 size_t len = min(nbytes, wsize); 917 size_t len = min(nbytes, wsize);
898 918
@@ -905,9 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
905 } while (nbytes != 0); 925 } while (nbytes != 0);
906 atomic_set(&req->wb_complete, requests); 926 atomic_set(&req->wb_complete, requests);
907 927
928 BUG_ON(desc->pg_lseg);
929 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
908 ClearPageError(page); 930 ClearPageError(page);
909 offset = 0; 931 offset = 0;
910 nbytes = count; 932 nbytes = desc->pg_count;
911 do { 933 do {
912 int ret2; 934 int ret2;
913 935
@@ -919,13 +941,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
919 if (nbytes < wsize) 941 if (nbytes < wsize)
920 wsize = nbytes; 942 wsize = nbytes;
921 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 943 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
922 wsize, offset, how); 944 wsize, offset, lseg, desc->pg_ioflags);
923 if (ret == 0) 945 if (ret == 0)
924 ret = ret2; 946 ret = ret2;
925 offset += wsize; 947 offset += wsize;
926 nbytes -= wsize; 948 nbytes -= wsize;
927 } while (nbytes != 0); 949 } while (nbytes != 0);
928 950
951 put_lseg(lseg);
952 desc->pg_lseg = NULL;
929 return ret; 953 return ret;
930 954
931out_bad: 955out_bad:
@@ -946,16 +970,26 @@ out_bad:
946 * This is the case if nfs_updatepage detects a conflicting request 970 * This is the case if nfs_updatepage detects a conflicting request
947 * that has been written but not committed. 971 * that has been written but not committed.
948 */ 972 */
949static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 973static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
950{ 974{
951 struct nfs_page *req; 975 struct nfs_page *req;
952 struct page **pages; 976 struct page **pages;
953 struct nfs_write_data *data; 977 struct nfs_write_data *data;
978 struct list_head *head = &desc->pg_list;
979 struct pnfs_layout_segment *lseg = desc->pg_lseg;
980 int ret;
954 981
955 data = nfs_writedata_alloc(npages); 982 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
956 if (!data) 983 desc->pg_count));
957 goto out_bad; 984 if (!data) {
958 985 while (!list_empty(head)) {
986 req = nfs_list_entry(head->next);
987 nfs_list_remove_request(req);
988 nfs_redirty_request(req);
989 }
990 ret = -ENOMEM;
991 goto out;
992 }
959 pages = data->pagevec; 993 pages = data->pagevec;
960 while (!list_empty(head)) { 994 while (!list_empty(head)) {
961 req = nfs_list_entry(head->next); 995 req = nfs_list_entry(head->next);
@@ -965,16 +999,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
965 *pages++ = req->wb_page; 999 *pages++ = req->wb_page;
966 } 1000 }
967 req = nfs_list_entry(data->pages.next); 1001 req = nfs_list_entry(data->pages.next);
1002 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
968 1004
969 /* Set up the argument struct */ 1005 /* Set up the argument struct */
970 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
971 out_bad: 1007out:
972 while (!list_empty(head)) { 1008 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
973 req = nfs_list_entry(head->next); 1009 desc->pg_lseg = NULL;
974 nfs_list_remove_request(req); 1010 return ret;
975 nfs_redirty_request(req);
976 }
977 return -ENOMEM;
978} 1011}
979 1012
980static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1013static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1015,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
982{ 1015{
983 size_t wsize = NFS_SERVER(inode)->wsize; 1016 size_t wsize = NFS_SERVER(inode)->wsize;
984 1017
1018 pnfs_pageio_init_write(pgio, inode);
1019
985 if (wsize < PAGE_CACHE_SIZE) 1020 if (wsize < PAGE_CACHE_SIZE)
986 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1021 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
987 else 1022 else
@@ -1132,7 +1167,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
1132/* 1167/*
1133 * This function is called when the WRITE call is complete. 1168 * This function is called when the WRITE call is complete.
1134 */ 1169 */
1135int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1170void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1136{ 1171{
1137 struct nfs_writeargs *argp = &data->args; 1172 struct nfs_writeargs *argp = &data->args;
1138 struct nfs_writeres *resp = &data->res; 1173 struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1186,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1151 */ 1186 */
1152 status = NFS_PROTO(data->inode)->write_done(task, data); 1187 status = NFS_PROTO(data->inode)->write_done(task, data);
1153 if (status != 0) 1188 if (status != 0)
1154 return status; 1189 return;
1155 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1190 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1156 1191
1157#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1192#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1201,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1166 */ 1201 */
1167 static unsigned long complain; 1202 static unsigned long complain;
1168 1203
1204 /* Note this will print the MDS for a DS write */
1169 if (time_before(complain, jiffies)) { 1205 if (time_before(complain, jiffies)) {
1170 dprintk("NFS: faulty NFS server %s:" 1206 dprintk("NFS: faulty NFS server %s:"
1171 " (committed = %d) != (stable = %d)\n", 1207 " (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1222,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1186 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1222 /* Was this an NFSv2 write or an NFSv3 stable write? */
1187 if (resp->verf->committed != NFS_UNSTABLE) { 1223 if (resp->verf->committed != NFS_UNSTABLE) {
1188 /* Resend from where the server left off */ 1224 /* Resend from where the server left off */
1225 data->mds_offset += resp->count;
1189 argp->offset += resp->count; 1226 argp->offset += resp->count;
1190 argp->pgbase += resp->count; 1227 argp->pgbase += resp->count;
1191 argp->count -= resp->count; 1228 argp->count -= resp->count;
@@ -1196,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 argp->stable = NFS_FILE_SYNC; 1233 argp->stable = NFS_FILE_SYNC;
1197 } 1234 }
1198 nfs_restart_rpc(task, server->nfs_client); 1235 nfs_restart_rpc(task, server->nfs_client);
1199 return -EAGAIN; 1236 return;
1200 } 1237 }
1201 if (time_before(complain, jiffies)) { 1238 if (time_before(complain, jiffies)) {
1202 printk(KERN_WARNING 1239 printk(KERN_WARNING
@@ -1207,7 +1244,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1207 /* Can't do anything about it except throw an error. */ 1244 /* Can't do anything about it except throw an error. */
1208 task->tk_status = -EIO; 1245 task->tk_status = -EIO;
1209 } 1246 }
1210 return 0; 1247 return;
1211} 1248}
1212 1249
1213 1250
@@ -1292,6 +1329,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1329 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1330 if (IS_ERR(task))
1294 return PTR_ERR(task); 1331 return PTR_ERR(task);
1332 if (how & FLUSH_SYNC)
1333 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1334 rpc_put_task(task);
1296 return 0; 1335 return 0;
1297} 1336}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f348..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
432 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
433 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
434 */ 434 */
435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4); 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
436 if (unlikely(p == NULL)) 436 if (unlikely(p == NULL))
437 goto out_overflow; 437 goto out_overflow;
438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2445static struct nfs4_delegation * 2445static struct nfs4_delegation *
2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2447{ 2447{
2448 struct nfs4_delegation *dp = NULL; 2448 struct nfs4_delegation *dp;
2449 2449
2450 spin_lock(&recall_lock); 2450 spin_lock(&recall_lock);
2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2453 break; 2453 spin_unlock(&recall_lock);
2454 } 2454 return dp;
2455 }
2455 spin_unlock(&recall_lock); 2456 spin_unlock(&recall_lock);
2456 return dp; 2457 return NULL;
2457} 2458}
2458 2459
2459int share_access_to_flags(u32 share_access) 2460int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b8655070..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i; 1145 int i, j;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (i = 0; i < dummy; ++i) 1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy); 1219 READ32(dummy);
1220 break; 1220 break;
1221 case RPC_AUTH_GSS: 1221 case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 849fb4a2e814..d6c25d76b537 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -293,7 +293,7 @@ static int ocfs2_mknod(struct inode *dir,
293 } 293 }
294 294
295 /* get security xattr */ 295 /* get security xattr */
296 status = ocfs2_init_security_get(inode, dir, &si); 296 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
297 if (status) { 297 if (status) {
298 if (status == -EOPNOTSUPP) 298 if (status == -EOPNOTSUPP)
299 si.enable = 0; 299 si.enable = 0;
@@ -1665,7 +1665,7 @@ static int ocfs2_symlink(struct inode *dir,
1665 } 1665 }
1666 1666
1667 /* get security xattr */ 1667 /* get security xattr */
1668 status = ocfs2_init_security_get(inode, dir, &si); 1668 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
1669 if (status) { 1669 if (status) {
1670 if (status == -EOPNOTSUPP) 1670 if (status == -EOPNOTSUPP)
1671 si.enable = 0; 1671 si.enable = 0;
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95d..d5ab56cbe5c5 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
114extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
115extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
116 116
117int ocfs2_quota_setup(void);
118void ocfs2_quota_shutdown(void);
119
120#endif /* _OCFS2_QUOTA_H */ 117#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24c..a73f64166481 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
63 * write to gf 63 * write to gf
64 */ 64 */
65 65
66static struct workqueue_struct *ocfs2_quota_wq = NULL;
67
68static void qsync_work_fn(struct work_struct *work); 66static void qsync_work_fn(struct work_struct *work);
69 67
70static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) 68static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
400 OCFS2_QBLK_RESERVED_SPACE; 398 OCFS2_QBLK_RESERVED_SPACE;
401 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 399 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
402 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 400 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
403 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 401 schedule_delayed_work(&oinfo->dqi_sync_work,
404 msecs_to_jiffies(oinfo->dqi_syncms)); 402 msecs_to_jiffies(oinfo->dqi_syncms));
405 403
406out_err: 404out_err:
407 mlog_exit(status); 405 mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
635 struct super_block *sb = oinfo->dqi_gqinode->i_sb; 633 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
636 634
637 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 635 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
638 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 636 schedule_delayed_work(&oinfo->dqi_sync_work,
639 msecs_to_jiffies(oinfo->dqi_syncms)); 637 msecs_to_jiffies(oinfo->dqi_syncms));
640} 638}
641 639
642/* 640/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
923 .alloc_dquot = ocfs2_alloc_dquot, 921 .alloc_dquot = ocfs2_alloc_dquot,
924 .destroy_dquot = ocfs2_destroy_dquot, 922 .destroy_dquot = ocfs2_destroy_dquot,
925}; 923};
926
927int ocfs2_quota_setup(void)
928{
929 ocfs2_quota_wq = create_workqueue("o2quot");
930 if (!ocfs2_quota_wq)
931 return -ENOMEM;
932 return 0;
933}
934
935void ocfs2_quota_shutdown(void)
936{
937 if (ocfs2_quota_wq) {
938 flush_workqueue(ocfs2_quota_wq);
939 destroy_workqueue(ocfs2_quota_wq);
940 ocfs2_quota_wq = NULL;
941 }
942}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..c384d634872a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
@@ -4325,7 +4328,8 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4325 4328
4326 /* If the security isn't preserved, we need to re-initialize them. */ 4329 /* If the security isn't preserved, we need to re-initialize them. */
4327 if (!preserve) { 4330 if (!preserve) {
4328 error = ocfs2_init_security_and_acl(dir, new_orphan_inode); 4331 error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
4332 &new_dentry->d_name);
4329 if (error) 4333 if (error)
4330 mlog_errno(error); 4334 mlog_errno(error);
4331 } 4335 }
@@ -4376,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4376 if (IS_ERR(s)) 4380 if (IS_ERR(s))
4377 return PTR_ERR(s); 4381 return PTR_ERR(s);
4378 4382
4379 error = path_lookup(s, LOOKUP_PARENT, nd); 4383 error = kern_path_parent(s, nd);
4380 if (error) 4384 if (error)
4381 putname(s); 4385 putname(s);
4382 else 4386 else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..236ed1bdca2c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
@@ -1645,16 +1657,11 @@ static int __init ocfs2_init(void)
1645 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1646 } 1658 }
1647 1659
1648 status = ocfs2_quota_setup();
1649 if (status)
1650 goto leave;
1651
1652 ocfs2_set_locking_protocol(); 1660 ocfs2_set_locking_protocol();
1653 1661
1654 status = register_quota_format(&ocfs2_quota_format); 1662 status = register_quota_format(&ocfs2_quota_format);
1655leave: 1663leave:
1656 if (status < 0) { 1664 if (status < 0) {
1657 ocfs2_quota_shutdown();
1658 ocfs2_free_mem_caches(); 1665 ocfs2_free_mem_caches();
1659 exit_ocfs2_uptodate_cache(); 1666 exit_ocfs2_uptodate_cache();
1660 } 1667 }
@@ -1671,8 +1678,6 @@ static void __exit ocfs2_exit(void)
1671{ 1678{
1672 mlog_entry_void(); 1679 mlog_entry_void();
1673 1680
1674 ocfs2_quota_shutdown();
1675
1676 if (ocfs2_wq) { 1681 if (ocfs2_wq) {
1677 flush_workqueue(ocfs2_wq); 1682 flush_workqueue(ocfs2_wq);
1678 destroy_workqueue(ocfs2_wq); 1683 destroy_workqueue(ocfs2_wq);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67cd43914641..6bb602486c6b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,7 +7185,8 @@ out:
7185 * must not hold any lock expect i_mutex. 7185 * must not hold any lock expect i_mutex.
7186 */ 7186 */
7187int ocfs2_init_security_and_acl(struct inode *dir, 7187int ocfs2_init_security_and_acl(struct inode *dir,
7188 struct inode *inode) 7188 struct inode *inode,
7189 const struct qstr *qstr)
7189{ 7190{
7190 int ret = 0; 7191 int ret = 0;
7191 struct buffer_head *dir_bh = NULL; 7192 struct buffer_head *dir_bh = NULL;
@@ -7193,7 +7194,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
7193 .enable = 1, 7194 .enable = 1,
7194 }; 7195 };
7195 7196
7196 ret = ocfs2_init_security_get(inode, dir, &si); 7197 ret = ocfs2_init_security_get(inode, dir, qstr, &si);
7197 if (!ret) { 7198 if (!ret) {
7198 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7199 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7199 si.name, si.value, si.value_len, 7200 si.name, si.value, si.value_len,
@@ -7261,13 +7262,14 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7261 7262
7262int ocfs2_init_security_get(struct inode *inode, 7263int ocfs2_init_security_get(struct inode *inode,
7263 struct inode *dir, 7264 struct inode *dir,
7265 const struct qstr *qstr,
7264 struct ocfs2_security_xattr_info *si) 7266 struct ocfs2_security_xattr_info *si)
7265{ 7267{
7266 /* check whether ocfs2 support feature xattr */ 7268 /* check whether ocfs2 support feature xattr */
7267 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7269 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7268 return -EOPNOTSUPP; 7270 return -EOPNOTSUPP;
7269 return security_inode_init_security(inode, dir, &si->name, &si->value, 7271 return security_inode_init_security(inode, dir, qstr, &si->name,
7270 &si->value_len); 7272 &si->value, &si->value_len);
7271} 7273}
7272 7274
7273int ocfs2_init_security_set(handle_t *handle, 7275int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index aa64bb37a65b..d63cfb72316b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,6 +57,7 @@ int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
57 struct ocfs2_dinode *di); 57 struct ocfs2_dinode *di);
58int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 58int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *, 59int ocfs2_init_security_get(struct inode *, struct inode *,
60 const struct qstr *,
60 struct ocfs2_security_xattr_info *); 61 struct ocfs2_security_xattr_info *);
61int ocfs2_init_security_set(handle_t *, struct inode *, 62int ocfs2_init_security_set(handle_t *, struct inode *,
62 struct buffer_head *, 63 struct buffer_head *,
@@ -94,5 +95,6 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
94 struct buffer_head *new_bh, 95 struct buffer_head *new_bh,
95 bool preserve_security); 96 bool preserve_security);
96int ocfs2_init_security_and_acl(struct inode *dir, 97int ocfs2_init_security_and_acl(struct inode *dir,
97 struct inode *inode); 98 struct inode *inode,
99 const struct qstr *qstr);
98#endif /* OCFS2_XATTR_H */ 100#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..f83ca80cc59a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565{ 573{
566 struct path path; 574 struct path path;
567 int error = -EINVAL; 575 int error = -EINVAL;
568 int follow; 576 int lookup_flags;
569 577
570 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
571 goto out; 579 goto out;
572 580
573 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
574 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
575 if (error) 585 if (error)
576 goto out; 586 goto out;
577 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
662 const struct cred *cred) 672 const struct cred *cred)
663{ 673{
674 static const struct file_operations empty_fops = {};
664 struct inode *inode; 675 struct inode *inode;
665 int error; 676 int error;
666 677
667 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
668 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
669 inode = dentry->d_inode; 684 inode = dentry->d_inode;
670 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
671 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
679 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
680 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
681 f->f_pos = 0; 696 f->f_pos = 0;
682 f->f_op = fops_get(inode->i_fop);
683 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
684 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
685 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
686 if (error) 707 if (error)
687 goto cleanup_all; 708 goto cleanup_all;
@@ -693,7 +714,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
693 if (error) 714 if (error)
694 goto cleanup_all; 715 goto cleanup_all;
695 } 716 }
696 ima_counts_get(f); 717 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
718 i_readcount_inc(inode);
697 719
698 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 720 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
699 721
@@ -882,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file)
882 904
883EXPORT_SYMBOL(fd_install); 905EXPORT_SYMBOL(fd_install);
884 906
907static inline int build_open_flags(int flags, int mode, struct open_flags *op)
908{
909 int lookup_flags = 0;
910 int acc_mode;
911
912 if (!(flags & O_CREAT))
913 mode = 0;
914 op->mode = mode;
915
916 /* Must never be set by userspace */
917 flags &= ~FMODE_NONOTIFY;
918
919 /*
920 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
921 * check for O_DSYNC if the need any syncing at all we enforce it's
922 * always set instead of having to deal with possibly weird behaviour
923 * for malicious applications setting only __O_SYNC.
924 */
925 if (flags & __O_SYNC)
926 flags |= O_DSYNC;
927
928 /*
929 * If we have O_PATH in the open flag. Then we
930 * cannot have anything other than the below set of flags
931 */
932 if (flags & O_PATH) {
933 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
934 acc_mode = 0;
935 } else {
936 acc_mode = MAY_OPEN | ACC_MODE(flags);
937 }
938
939 op->open_flag = flags;
940
941 /* O_TRUNC implies we need access checks for write permissions */
942 if (flags & O_TRUNC)
943 acc_mode |= MAY_WRITE;
944
945 /* Allow the LSM permission hook to distinguish append
946 access from general write access. */
947 if (flags & O_APPEND)
948 acc_mode |= MAY_APPEND;
949
950 op->acc_mode = acc_mode;
951
952 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
953
954 if (flags & O_CREAT) {
955 op->intent |= LOOKUP_CREATE;
956 if (flags & O_EXCL)
957 op->intent |= LOOKUP_EXCL;
958 }
959
960 if (flags & O_DIRECTORY)
961 lookup_flags |= LOOKUP_DIRECTORY;
962 if (!(flags & O_NOFOLLOW))
963 lookup_flags |= LOOKUP_FOLLOW;
964 return lookup_flags;
965}
966
967/**
968 * filp_open - open file and return file pointer
969 *
970 * @filename: path to open
971 * @flags: open flags as per the open(2) second argument
972 * @mode: mode for the new file if O_CREAT is set, else ignored
973 *
974 * This is the helper to open a file from kernelspace if you really
975 * have to. But in generally you should not do this, so please move
976 * along, nothing to see here..
977 */
978struct file *filp_open(const char *filename, int flags, int mode)
979{
980 struct open_flags op;
981 int lookup = build_open_flags(flags, mode, &op);
982 return do_filp_open(AT_FDCWD, filename, &op, lookup);
983}
984EXPORT_SYMBOL(filp_open);
985
986struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
987 const char *filename, int flags)
988{
989 struct open_flags op;
990 int lookup = build_open_flags(flags, 0, &op);
991 if (flags & O_CREAT)
992 return ERR_PTR(-EINVAL);
993 if (!filename && (flags & O_DIRECTORY))
994 if (!dentry->d_inode->i_op->lookup)
995 return ERR_PTR(-ENOTDIR);
996 return do_file_open_root(dentry, mnt, filename, &op, lookup);
997}
998EXPORT_SYMBOL(file_open_root);
999
885long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1000long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
886{ 1001{
1002 struct open_flags op;
1003 int lookup = build_open_flags(flags, mode, &op);
887 char *tmp = getname(filename); 1004 char *tmp = getname(filename);
888 int fd = PTR_ERR(tmp); 1005 int fd = PTR_ERR(tmp);
889 1006
890 if (!IS_ERR(tmp)) { 1007 if (!IS_ERR(tmp)) {
891 fd = get_unused_fd_flags(flags); 1008 fd = get_unused_fd_flags(flags);
892 if (fd >= 0) { 1009 if (fd >= 0) {
893 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1010 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
894 if (IS_ERR(f)) { 1011 if (IS_ERR(f)) {
895 put_unused_fd(fd); 1012 put_unused_fd(fd);
896 fd = PTR_ERR(f); 1013 fd = PTR_ERR(f);
@@ -960,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id)
960 if (filp->f_op && filp->f_op->flush) 1077 if (filp->f_op && filp->f_op->flush)
961 retval = filp->f_op->flush(filp, id); 1078 retval = filp->f_op->flush(filp, id);
962 1079
963 dnotify_flush(filp, id); 1080 if (likely(!(filp->f_mode & FMODE_PATH))) {
964 locks_remove_posix(filp, id); 1081 dnotify_flush(filp, id);
1082 locks_remove_posix(filp, id);
1083 }
965 fput(filp); 1084 fput(filp);
966 return retval; 1085 return retval;
967} 1086}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..764b86a01965 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 18
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2621}; 2621};
2622 2622
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2625{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2656 if (p->fop)
2686 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2687 ei->op = p->op; 2658 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2659 d_add(dentry, inode);
2690 error = NULL; 2660 error = NULL;
2691out: 2661out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt; 49struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..f50133c11c24 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -32,7 +32,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
32 ei->sysctl_entry = table; 32 ei->sysctl_entry = table;
33 33
34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
35 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
36 inode->i_mode = table->mode; 35 inode->i_mode = table->mode;
37 if (!table->child) { 36 if (!table->child) {
38 inode->i_mode |= S_IFREG; 37 inode->i_mode |= S_IFREG;
@@ -408,15 +407,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 407 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 408 unsigned int len, const char *str, const struct qstr *name)
410{ 409{
410 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 411 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 412 * that inode here can be NULL */
413 /* AV: can it, indeed? */
413 if (!inode) 414 if (!inode)
414 return 0; 415 return 1;
415 if (name->len != len) 416 if (name->len != len)
416 return 1; 417 return 1;
417 if (memcmp(name->name, str, len)) 418 if (memcmp(name->name, str, len))
418 return 1; 419 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 420 head = rcu_dereference(PROC_I(inode)->sysctl);
421 return !head || !sysctl_is_seen(head);
420} 422}
421 423
422static const struct dentry_operations proc_sys_dentry_operations = { 424static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
new file mode 100644
index 000000000000..867d0ac026ce
--- /dev/null
+++ b/fs/pstore/Kconfig
@@ -0,0 +1,13 @@
1config PSTORE
2 bool "Persistant store support"
3 default n
4 help
5 This option enables generic access to platform level
6 persistent storage via "pstore" filesystem that can
7 be mounted as /dev/pstore. Only useful if you have
8 a platform level driver that registers with pstore to
9 provide the data, so you probably should just go say "Y"
10 (or "M") to a platform specific persistent store driver
11 (e.g. ACPI_APEI on X86) which will select this for you.
12 If you don't have a platform persistent store driver,
13 say N.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
new file mode 100644
index 000000000000..760f4bce7d1d
--- /dev/null
+++ b/fs/pstore/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux pstorefs routines.
3#
4
5obj-y += pstore.o
6
7pstore-objs += inode.o platform.o
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
new file mode 100644
index 000000000000..549d245d0b42
--- /dev/null
+++ b/fs/pstore/inode.c
@@ -0,0 +1,285 @@
1/*
2 * Persistent Storage - ramfs parts.
3 *
4 * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/fs.h>
22#include <linux/fsnotify.h>
23#include <linux/pagemap.h>
24#include <linux/highmem.h>
25#include <linux/time.h>
26#include <linux/init.h>
27#include <linux/string.h>
28#include <linux/mount.h>
29#include <linux/ramfs.h>
30#include <linux/sched.h>
31#include <linux/magic.h>
32#include <linux/pstore.h>
33#include <linux/slab.h>
34#include <linux/uaccess.h>
35
36#include "internal.h"
37
38#define PSTORE_NAMELEN 64
39
40struct pstore_private {
41 u64 id;
42 int (*erase)(u64);
43};
44
45#define pstore_get_inode ramfs_get_inode
46
47/*
48 * When a file is unlinked from our file system we call the
49 * platform driver to erase the record from persistent store.
50 */
51static int pstore_unlink(struct inode *dir, struct dentry *dentry)
52{
53 struct pstore_private *p = dentry->d_inode->i_private;
54
55 p->erase(p->id);
56 kfree(p);
57
58 return simple_unlink(dir, dentry);
59}
60
61static const struct inode_operations pstore_dir_inode_operations = {
62 .lookup = simple_lookup,
63 .unlink = pstore_unlink,
64};
65
66static const struct super_operations pstore_ops = {
67 .statfs = simple_statfs,
68 .drop_inode = generic_delete_inode,
69 .show_options = generic_show_options,
70};
71
72static struct super_block *pstore_sb;
73static struct vfsmount *pstore_mnt;
74
75int pstore_is_mounted(void)
76{
77 return pstore_mnt != NULL;
78}
79
80/*
81 * Set up a file structure as if we had opened this file and
82 * write our data to it.
83 */
84static int pstore_writefile(struct inode *inode, struct dentry *dentry,
85 char *data, size_t size)
86{
87 struct file f;
88 ssize_t n;
89 mm_segment_t old_fs = get_fs();
90
91 memset(&f, '0', sizeof f);
92 f.f_mapping = inode->i_mapping;
93 f.f_path.dentry = dentry;
94 f.f_path.mnt = pstore_mnt;
95 f.f_pos = 0;
96 f.f_op = inode->i_fop;
97 set_fs(KERNEL_DS);
98 n = do_sync_write(&f, data, size, &f.f_pos);
99 set_fs(old_fs);
100
101 fsnotify_modify(&f);
102
103 return n == size;
104}
105
106/*
107 * Make a regular file in the root directory of our file system.
108 * Load it up with "size" bytes of data from "buf".
109 * Set the mtime & ctime to the date that this record was originally stored.
110 */
111int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
112 char *data, size_t size,
113 struct timespec time, int (*erase)(u64))
114{
115 struct dentry *root = pstore_sb->s_root;
116 struct dentry *dentry;
117 struct inode *inode;
118 int rc;
119 char name[PSTORE_NAMELEN];
120 struct pstore_private *private;
121
122 rc = -ENOMEM;
123 inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
124 if (!inode)
125 goto fail;
126 inode->i_uid = inode->i_gid = 0;
127 private = kmalloc(sizeof *private, GFP_KERNEL);
128 if (!private)
129 goto fail_alloc;
130 private->id = id;
131 private->erase = erase;
132
133 switch (type) {
134 case PSTORE_TYPE_DMESG:
135 sprintf(name, "dmesg-%s-%lld", psname, id);
136 break;
137 case PSTORE_TYPE_MCE:
138 sprintf(name, "mce-%s-%lld", psname, id);
139 break;
140 case PSTORE_TYPE_UNKNOWN:
141 sprintf(name, "unknown-%s-%lld", psname, id);
142 break;
143 default:
144 sprintf(name, "type%d-%s-%lld", type, psname, id);
145 break;
146 }
147
148 mutex_lock(&root->d_inode->i_mutex);
149
150 rc = -ENOSPC;
151 dentry = d_alloc_name(root, name);
152 if (IS_ERR(dentry))
153 goto fail_lockedalloc;
154
155 d_add(dentry, inode);
156
157 mutex_unlock(&root->d_inode->i_mutex);
158
159 if (!pstore_writefile(inode, dentry, data, size))
160 goto fail_write;
161
162 inode->i_private = private;
163
164 if (time.tv_sec)
165 inode->i_mtime = inode->i_ctime = time;
166
167 return 0;
168
169fail_write:
170 kfree(private);
171 inode->i_nlink--;
172 mutex_lock(&root->d_inode->i_mutex);
173 d_delete(dentry);
174 dput(dentry);
175 mutex_unlock(&root->d_inode->i_mutex);
176 goto fail;
177
178fail_lockedalloc:
179 mutex_unlock(&root->d_inode->i_mutex);
180 kfree(private);
181fail_alloc:
182 iput(inode);
183
184fail:
185 return rc;
186}
187
188int pstore_fill_super(struct super_block *sb, void *data, int silent)
189{
190 struct inode *inode = NULL;
191 struct dentry *root;
192 int err;
193
194 save_mount_options(sb, data);
195
196 pstore_sb = sb;
197
198 sb->s_maxbytes = MAX_LFS_FILESIZE;
199 sb->s_blocksize = PAGE_CACHE_SIZE;
200 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
201 sb->s_magic = PSTOREFS_MAGIC;
202 sb->s_op = &pstore_ops;
203 sb->s_time_gran = 1;
204
205 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
206 if (!inode) {
207 err = -ENOMEM;
208 goto fail;
209 }
210 /* override ramfs "dir" options so we catch unlink(2) */
211 inode->i_op = &pstore_dir_inode_operations;
212
213 root = d_alloc_root(inode);
214 sb->s_root = root;
215 if (!root) {
216 err = -ENOMEM;
217 goto fail;
218 }
219
220 pstore_get_records();
221
222 return 0;
223fail:
224 iput(inode);
225 return err;
226}
227
228static int pstore_get_sb(struct file_system_type *fs_type,
229 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
230{
231 struct dentry *root;
232
233 root = mount_nodev(fs_type, flags, data, pstore_fill_super);
234 if (IS_ERR(root))
235 return -ENOMEM;
236
237 mnt->mnt_root = root;
238 mnt->mnt_sb = root->d_sb;
239 pstore_mnt = mnt;
240
241 return 0;
242}
243
244static void pstore_kill_sb(struct super_block *sb)
245{
246 kill_litter_super(sb);
247 pstore_sb = NULL;
248 pstore_mnt = NULL;
249}
250
251static struct file_system_type pstore_fs_type = {
252 .name = "pstore",
253 .get_sb = pstore_get_sb,
254 .kill_sb = pstore_kill_sb,
255};
256
257static int __init init_pstore_fs(void)
258{
259 int rc = 0;
260 struct kobject *pstorefs_kobj;
261
262 pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj);
263 if (!pstorefs_kobj) {
264 rc = -ENOMEM;
265 goto done;
266 }
267
268 rc = sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
269 if (rc)
270 goto done1;
271
272 rc = register_filesystem(&pstore_fs_type);
273 if (rc == 0)
274 goto done;
275
276 sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
277done1:
278 kobject_put(pstorefs_kobj);
279done:
280 return rc;
281}
282module_init(init_pstore_fs)
283
284MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
285MODULE_LICENSE("GPL");
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
new file mode 100644
index 000000000000..76c26d2fab29
--- /dev/null
+++ b/fs/pstore/internal.h
@@ -0,0 +1,7 @@
1extern void pstore_get_records(void);
2extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
3 char *data, size_t size,
4 struct timespec time, int (*erase)(u64));
5extern int pstore_is_mounted(void);
6
7extern struct kobj_attribute pstore_kmsg_bytes_attr;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
new file mode 100644
index 000000000000..705fdf8abf6e
--- /dev/null
+++ b/fs/pstore/platform.c
@@ -0,0 +1,202 @@
1/*
2 * Persistent Storage - platform driver interface parts.
3 *
4 * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/atomic.h>
21#include <linux/types.h>
22#include <linux/errno.h>
23#include <linux/init.h>
24#include <linux/kmsg_dump.h>
25#include <linux/module.h>
26#include <linux/pstore.h>
27#include <linux/string.h>
28#include <linux/slab.h>
29#include <linux/uaccess.h>
30
31#include "internal.h"
32
33/*
34 * pstore_lock just protects "psinfo" during
35 * calls to pstore_register()
36 */
37static DEFINE_SPINLOCK(pstore_lock);
38static struct pstore_info *psinfo;
39
40/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */
41static unsigned long kmsg_bytes = 10240;
42
43static ssize_t b_show(struct kobject *kobj,
44 struct kobj_attribute *attr, char *buf)
45{
46 return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes);
47}
48
49static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
50 const char *buf, size_t count)
51{
52 return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0;
53}
54
55struct kobj_attribute pstore_kmsg_bytes_attr =
56 __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store);
57
58/* Tag each group of saved records with a sequence number */
59static int oopscount;
60
61/*
62 * callback from kmsg_dump. (s2,l2) has the most recently
63 * written bytes, older bytes are in (s1,l1). Save as much
64 * as we can from the end of the buffer.
65 */
66static void pstore_dump(struct kmsg_dumper *dumper,
67 enum kmsg_dump_reason reason,
68 const char *s1, unsigned long l1,
69 const char *s2, unsigned long l2)
70{
71 unsigned long s1_start, s2_start;
72 unsigned long l1_cpy, l2_cpy;
73 unsigned long size, total = 0;
74 char *dst;
75 u64 id;
76 int hsize, part = 1;
77
78 mutex_lock(&psinfo->buf_mutex);
79 oopscount++;
80 while (total < kmsg_bytes) {
81 dst = psinfo->buf;
82 hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++);
83 size = psinfo->bufsize - hsize;
84 dst += hsize;
85
86 l2_cpy = min(l2, size);
87 l1_cpy = min(l1, size - l2_cpy);
88
89 if (l1_cpy + l2_cpy == 0)
90 break;
91
92 s2_start = l2 - l2_cpy;
93 s1_start = l1 - l1_cpy;
94
95 memcpy(dst, s1 + s1_start, l1_cpy);
96 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
97
98 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
99 if (pstore_is_mounted())
100 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
101 psinfo->buf, hsize + l1_cpy + l2_cpy,
102 CURRENT_TIME, psinfo->erase);
103 l1 -= l1_cpy;
104 l2 -= l2_cpy;
105 total += l1_cpy + l2_cpy;
106 }
107 mutex_unlock(&psinfo->buf_mutex);
108}
109
110static struct kmsg_dumper pstore_dumper = {
111 .dump = pstore_dump,
112};
113
114/*
115 * platform specific persistent storage driver registers with
116 * us here. If pstore is already mounted, call the platform
117 * read function right away to populate the file system. If not
118 * then the pstore mount code will call us later to fill out
119 * the file system.
120 *
121 * Register with kmsg_dump to save last part of console log on panic.
122 */
123int pstore_register(struct pstore_info *psi)
124{
125 struct module *owner = psi->owner;
126
127 spin_lock(&pstore_lock);
128 if (psinfo) {
129 spin_unlock(&pstore_lock);
130 return -EBUSY;
131 }
132 psinfo = psi;
133 spin_unlock(&pstore_lock);
134
135 if (owner && !try_module_get(owner)) {
136 psinfo = NULL;
137 return -EINVAL;
138 }
139
140 if (pstore_is_mounted())
141 pstore_get_records();
142
143 kmsg_dump_register(&pstore_dumper);
144
145 return 0;
146}
147EXPORT_SYMBOL_GPL(pstore_register);
148
149/*
150 * Read all the records from the persistent store. Create and
151 * file files in our filesystem.
152 */
153void pstore_get_records(void)
154{
155 struct pstore_info *psi = psinfo;
156 size_t size;
157 u64 id;
158 enum pstore_type_id type;
159 struct timespec time;
160 int failed = 0;
161
162 if (!psi)
163 return;
164
165 mutex_lock(&psinfo->buf_mutex);
166 while ((size = psi->read(&id, &type, &time)) > 0) {
167 if (pstore_mkfile(type, psi->name, id, psi->buf, size,
168 time, psi->erase))
169 failed++;
170 }
171 mutex_unlock(&psinfo->buf_mutex);
172
173 if (failed)
174 printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
175 failed, psi->name);
176}
177
178/*
179 * Call platform driver to write a record to the
180 * persistent store.
181 */
182int pstore_write(enum pstore_type_id type, char *buf, size_t size)
183{
184 u64 id;
185
186 if (!psinfo)
187 return -ENODEV;
188
189 if (size > psinfo->bufsize)
190 return -EFBIG;
191
192 mutex_lock(&psinfo->buf_mutex);
193 memcpy(psinfo->buf, buf, size);
194 id = psinfo->write(type, size);
195 if (pstore_is_mounted())
196 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
197 size, CURRENT_TIME, psinfo->erase);
198 mutex_unlock(&psinfo->buf_mutex);
199
200 return 0;
201}
202EXPORT_SYMBOL_GPL(pstore_write);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 65444d29406b..f1ab3604db5a 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -112,7 +112,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
112 if (!info->dqi_priv) { 112 if (!info->dqi_priv) {
113 printk(KERN_WARNING 113 printk(KERN_WARNING
114 "Not enough memory for quota information structure.\n"); 114 "Not enough memory for quota information structure.\n");
115 return -1; 115 return -ENOMEM;
116 } 116 }
117 qinfo = info->dqi_priv; 117 qinfo = info->dqi_priv;
118 if (version == 0) { 118 if (version == 0) {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e6990..c77514bd5776 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2876 reiserfs_mounted_fs_count++; 2876 reiserfs_mounted_fs_count++;
2877 if (reiserfs_mounted_fs_count <= 1) { 2877 if (reiserfs_mounted_fs_count <= 1) {
2878 reiserfs_write_unlock(sb); 2878 reiserfs_write_unlock(sb);
2879 commit_wq = create_workqueue("reiserfs"); 2879 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2880 reiserfs_write_lock(sb); 2880 reiserfs_write_lock(sb);
2881 } 2881 }
2882 2882
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..118662690cdf 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,7 +593,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
593 new_inode_init(inode, dir, mode); 593 new_inode_init(inode, dir, mode);
594 594
595 jbegin_count += reiserfs_cache_default_acl(dir); 595 jbegin_count += reiserfs_cache_default_acl(dir);
596 retval = reiserfs_security_init(dir, inode, &security); 596 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
597 if (retval < 0) { 597 if (retval < 0) {
598 drop_new_inode(inode); 598 drop_new_inode(inode);
599 return retval; 599 return retval;
@@ -667,7 +667,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
667 new_inode_init(inode, dir, mode); 667 new_inode_init(inode, dir, mode);
668 668
669 jbegin_count += reiserfs_cache_default_acl(dir); 669 jbegin_count += reiserfs_cache_default_acl(dir);
670 retval = reiserfs_security_init(dir, inode, &security); 670 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
671 if (retval < 0) { 671 if (retval < 0) {
672 drop_new_inode(inode); 672 drop_new_inode(inode);
673 return retval; 673 return retval;
@@ -747,7 +747,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
747 new_inode_init(inode, dir, mode); 747 new_inode_init(inode, dir, mode);
748 748
749 jbegin_count += reiserfs_cache_default_acl(dir); 749 jbegin_count += reiserfs_cache_default_acl(dir);
750 retval = reiserfs_security_init(dir, inode, &security); 750 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
751 if (retval < 0) { 751 if (retval < 0) {
752 drop_new_inode(inode); 752 drop_new_inode(inode);
753 return retval; 753 return retval;
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
@@ -1032,7 +1032,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1032 } 1032 }
1033 new_inode_init(inode, parent_dir, mode); 1033 new_inode_init(inode, parent_dir, mode);
1034 1034
1035 retval = reiserfs_security_init(parent_dir, inode, &security); 1035 retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
1036 &security);
1036 if (retval < 0) { 1037 if (retval < 0) {
1037 drop_new_inode(inode); 1038 drop_new_inode(inode);
1038 return retval; 1039 return retval;
@@ -1122,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1123 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1124 return -EMLINK;
1124 } 1125 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1126
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1127 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1128 inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 237c6928d3c6..ef66c18a9332 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -54,6 +54,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
54 * of blocks needed for the transaction. If successful, reiserfs_security 54 * of blocks needed for the transaction. If successful, reiserfs_security
55 * must be released using reiserfs_security_free when the caller is done. */ 55 * must be released using reiserfs_security_free when the caller is done. */
56int reiserfs_security_init(struct inode *dir, struct inode *inode, 56int reiserfs_security_init(struct inode *dir, struct inode *inode,
57 const struct qstr *qstr,
57 struct reiserfs_security_handle *sec) 58 struct reiserfs_security_handle *sec)
58{ 59{
59 int blocks = 0; 60 int blocks = 0;
@@ -65,7 +66,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
65 if (IS_PRIVATE(dir)) 66 if (IS_PRIVATE(dir))
66 return 0; 67 return 0;
67 68
68 error = security_inode_init_security(inode, dir, &sec->name, 69 error = security_inode_init_security(inode, dir, qstr, &sec->name,
69 &sec->value, &sec->length); 70 &sec->value, &sec->length);
70 if (error) { 71 if (error) {
71 if (error == -EOPNOTSUPP) 72 if (error == -EOPNOTSUPP)
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/super.c b/fs/super.c
index 7e9dd4cc2c01..4bae0ef6110e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -843,23 +843,6 @@ error:
843} 843}
844EXPORT_SYMBOL(mount_bdev); 844EXPORT_SYMBOL(mount_bdev);
845 845
846int get_sb_bdev(struct file_system_type *fs_type,
847 int flags, const char *dev_name, void *data,
848 int (*fill_super)(struct super_block *, void *, int),
849 struct vfsmount *mnt)
850{
851 struct dentry *root;
852
853 root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
854 if (IS_ERR(root))
855 return PTR_ERR(root);
856 mnt->mnt_root = root;
857 mnt->mnt_sb = root->d_sb;
858 return 0;
859}
860
861EXPORT_SYMBOL(get_sb_bdev);
862
863void kill_block_super(struct super_block *sb) 846void kill_block_super(struct super_block *sb)
864{ 847{
865 struct block_device *bdev = sb->s_bdev; 848 struct block_device *bdev = sb->s_bdev;
@@ -897,22 +880,6 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
897} 880}
898EXPORT_SYMBOL(mount_nodev); 881EXPORT_SYMBOL(mount_nodev);
899 882
900int get_sb_nodev(struct file_system_type *fs_type,
901 int flags, void *data,
902 int (*fill_super)(struct super_block *, void *, int),
903 struct vfsmount *mnt)
904{
905 struct dentry *root;
906
907 root = mount_nodev(fs_type, flags, data, fill_super);
908 if (IS_ERR(root))
909 return PTR_ERR(root);
910 mnt->mnt_root = root;
911 mnt->mnt_sb = root->d_sb;
912 return 0;
913}
914EXPORT_SYMBOL(get_sb_nodev);
915
916static int compare_single(struct super_block *s, void *p) 883static int compare_single(struct super_block *s, void *p)
917{ 884{
918 return 1; 885 return 1;
@@ -943,22 +910,6 @@ struct dentry *mount_single(struct file_system_type *fs_type,
943} 910}
944EXPORT_SYMBOL(mount_single); 911EXPORT_SYMBOL(mount_single);
945 912
946int get_sb_single(struct file_system_type *fs_type,
947 int flags, void *data,
948 int (*fill_super)(struct super_block *, void *, int),
949 struct vfsmount *mnt)
950{
951 struct dentry *root;
952 root = mount_single(fs_type, flags, data, fill_super);
953 if (IS_ERR(root))
954 return PTR_ERR(root);
955 mnt->mnt_root = root;
956 mnt->mnt_sb = root->d_sb;
957 return 0;
958}
959
960EXPORT_SYMBOL(get_sb_single);
961
962struct vfsmount * 913struct vfsmount *
963vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 914vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
964{ 915{
@@ -988,19 +939,13 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
988 goto out_free_secdata; 939 goto out_free_secdata;
989 } 940 }
990 941
991 if (type->mount) { 942 root = type->mount(type, flags, name, data);
992 root = type->mount(type, flags, name, data); 943 if (IS_ERR(root)) {
993 if (IS_ERR(root)) { 944 error = PTR_ERR(root);
994 error = PTR_ERR(root); 945 goto out_free_secdata;
995 goto out_free_secdata;
996 }
997 mnt->mnt_root = root;
998 mnt->mnt_sb = root->d_sb;
999 } else {
1000 error = type->get_sb(type, flags, name, data, mnt);
1001 if (error < 0)
1002 goto out_free_secdata;
1003 } 946 }
947 mnt->mnt_root = root;
948 mnt->mnt_sb = root->d_sb;
1004 BUG_ON(!mnt->mnt_sb); 949 BUG_ON(!mnt->mnt_sb);
1005 WARN_ON(!mnt->mnt_sb->s_bdi); 950 WARN_ON(!mnt->mnt_sb->s_bdi);
1006 mnt->mnt_sb->s_flags |= MS_BORN; 951 mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 306ee39ef2c3..8994dd041660 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -31,7 +31,7 @@
31#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) 31#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr)
32#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) 32#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr)
33#define udf_find_next_one_bit(addr, size, offset) \ 33#define udf_find_next_one_bit(addr, size, offset) \
34 ext2_find_next_bit(addr, size, offset) 34 ext2_find_next_bit((unsigned long *)(addr), size, offset)
35 35
36static int read_block_bitmap(struct super_block *sb, 36static int read_block_bitmap(struct super_block *sb,
37 struct udf_bitmap *bitmap, unsigned int block, 37 struct udf_bitmap *bitmap, unsigned int block,
@@ -297,7 +297,7 @@ repeat:
297 break; 297 break;
298 } 298 }
299 } else { 299 } else {
300 bit = udf_find_next_one_bit((char *)bh->b_data, 300 bit = udf_find_next_one_bit(bh->b_data,
301 sb->s_blocksize << 3, 301 sb->s_blocksize << 3,
302 group_start << 3); 302 group_start << 3);
303 if (bit < sb->s_blocksize << 3) 303 if (bit < sb->s_blocksize << 3)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 89c78486cbbe..f391a2adc699 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -123,8 +123,8 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
123 if (inode->i_sb->s_blocksize < 123 if (inode->i_sb->s_blocksize <
124 (udf_file_entry_alloc_offset(inode) + 124 (udf_file_entry_alloc_offset(inode) +
125 pos + count)) { 125 pos + count)) {
126 udf_expand_file_adinicb(inode, pos + count, &err); 126 err = udf_expand_file_adinicb(inode);
127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 127 if (err) {
128 udf_debug("udf_expand_adinicb: err=%d\n", err); 128 udf_debug("udf_expand_adinicb: err=%d\n", err);
129 up_write(&iinfo->i_data_sem); 129 up_write(&iinfo->i_data_sem);
130 return err; 130 return err;
@@ -237,7 +237,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
237 237
238 if ((attr->ia_valid & ATTR_SIZE) && 238 if ((attr->ia_valid & ATTR_SIZE) &&
239 attr->ia_size != i_size_read(inode)) { 239 attr->ia_size != i_size_read(inode)) {
240 error = vmtruncate(inode, attr->ia_size); 240 error = udf_setsize(inode, attr->ia_size);
241 if (error) 241 if (error)
242 return error; 242 return error;
243 } 243 }
@@ -249,5 +249,4 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
249 249
250const struct inode_operations udf_file_inode_operations = { 250const struct inode_operations udf_file_inode_operations = {
251 .setattr = udf_setattr, 251 .setattr = udf_setattr,
252 .truncate = udf_truncate,
253}; 252};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c6a2e782b97b..ccc814321414 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -73,14 +73,12 @@ void udf_evict_inode(struct inode *inode)
73 struct udf_inode_info *iinfo = UDF_I(inode); 73 struct udf_inode_info *iinfo = UDF_I(inode);
74 int want_delete = 0; 74 int want_delete = 0;
75 75
76 truncate_inode_pages(&inode->i_data, 0);
77
78 if (!inode->i_nlink && !is_bad_inode(inode)) { 76 if (!inode->i_nlink && !is_bad_inode(inode)) {
79 want_delete = 1; 77 want_delete = 1;
80 inode->i_size = 0; 78 udf_setsize(inode, 0);
81 udf_truncate(inode);
82 udf_update_inode(inode, IS_SYNC(inode)); 79 udf_update_inode(inode, IS_SYNC(inode));
83 } 80 } else
81 truncate_inode_pages(&inode->i_data, 0);
84 invalidate_inode_buffers(inode); 82 invalidate_inode_buffers(inode);
85 end_writeback(inode); 83 end_writeback(inode);
86 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
@@ -117,9 +115,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
117 115
118 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); 116 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
119 if (unlikely(ret)) { 117 if (unlikely(ret)) {
120 loff_t isize = mapping->host->i_size; 118 struct inode *inode = mapping->host;
121 if (pos + len > isize) 119 struct udf_inode_info *iinfo = UDF_I(inode);
122 vmtruncate(mapping->host, isize); 120 loff_t isize = inode->i_size;
121
122 if (pos + len > isize) {
123 truncate_pagecache(inode, pos + len, isize);
124 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
125 down_write(&iinfo->i_data_sem);
126 udf_truncate_extents(inode);
127 up_write(&iinfo->i_data_sem);
128 }
129 }
123 } 130 }
124 131
125 return ret; 132 return ret;
@@ -139,30 +146,31 @@ const struct address_space_operations udf_aops = {
139 .bmap = udf_bmap, 146 .bmap = udf_bmap,
140}; 147};
141 148
142void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) 149int udf_expand_file_adinicb(struct inode *inode)
143{ 150{
144 struct page *page; 151 struct page *page;
145 char *kaddr; 152 char *kaddr;
146 struct udf_inode_info *iinfo = UDF_I(inode); 153 struct udf_inode_info *iinfo = UDF_I(inode);
154 int err;
147 struct writeback_control udf_wbc = { 155 struct writeback_control udf_wbc = {
148 .sync_mode = WB_SYNC_NONE, 156 .sync_mode = WB_SYNC_NONE,
149 .nr_to_write = 1, 157 .nr_to_write = 1,
150 }; 158 };
151 159
152 /* from now on we have normal address_space methods */
153 inode->i_data.a_ops = &udf_aops;
154
155 if (!iinfo->i_lenAlloc) { 160 if (!iinfo->i_lenAlloc) {
156 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 161 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
157 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; 162 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
158 else 163 else
159 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 164 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
165 /* from now on we have normal address_space methods */
166 inode->i_data.a_ops = &udf_aops;
160 mark_inode_dirty(inode); 167 mark_inode_dirty(inode);
161 return; 168 return 0;
162 } 169 }
163 170
164 page = grab_cache_page(inode->i_mapping, 0); 171 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
165 BUG_ON(!PageLocked(page)); 172 if (!page)
173 return -ENOMEM;
166 174
167 if (!PageUptodate(page)) { 175 if (!PageUptodate(page)) {
168 kaddr = kmap(page); 176 kaddr = kmap(page);
@@ -181,11 +189,24 @@ void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err)
181 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; 189 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
182 else 190 else
183 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 191 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
184 192 /* from now on we have normal address_space methods */
185 inode->i_data.a_ops->writepage(page, &udf_wbc); 193 inode->i_data.a_ops = &udf_aops;
194 err = inode->i_data.a_ops->writepage(page, &udf_wbc);
195 if (err) {
196 /* Restore everything back so that we don't lose data... */
197 lock_page(page);
198 kaddr = kmap(page);
199 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
200 inode->i_size);
201 kunmap(page);
202 unlock_page(page);
203 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
204 inode->i_data.a_ops = &udf_adinicb_aops;
205 }
186 page_cache_release(page); 206 page_cache_release(page);
187
188 mark_inode_dirty(inode); 207 mark_inode_dirty(inode);
208
209 return err;
189} 210}
190 211
191struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, 212struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
@@ -348,8 +369,10 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block,
348} 369}
349 370
350/* Extend the file by 'blocks' blocks, return the number of extents added */ 371/* Extend the file by 'blocks' blocks, return the number of extents added */
351int udf_extend_file(struct inode *inode, struct extent_position *last_pos, 372static int udf_do_extend_file(struct inode *inode,
352 struct kernel_long_ad *last_ext, sector_t blocks) 373 struct extent_position *last_pos,
374 struct kernel_long_ad *last_ext,
375 sector_t blocks)
353{ 376{
354 sector_t add; 377 sector_t add;
355 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); 378 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
@@ -357,6 +380,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
357 struct kernel_lb_addr prealloc_loc = {}; 380 struct kernel_lb_addr prealloc_loc = {};
358 int prealloc_len = 0; 381 int prealloc_len = 0;
359 struct udf_inode_info *iinfo; 382 struct udf_inode_info *iinfo;
383 int err;
360 384
361 /* The previous extent is fake and we should not extend by anything 385 /* The previous extent is fake and we should not extend by anything
362 * - there's nothing to do... */ 386 * - there's nothing to do... */
@@ -422,26 +446,29 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
422 /* Create enough extents to cover the whole hole */ 446 /* Create enough extents to cover the whole hole */
423 while (blocks > add) { 447 while (blocks > add) {
424 blocks -= add; 448 blocks -= add;
425 if (udf_add_aext(inode, last_pos, &last_ext->extLocation, 449 err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
426 last_ext->extLength, 1) == -1) 450 last_ext->extLength, 1);
427 return -1; 451 if (err)
452 return err;
428 count++; 453 count++;
429 } 454 }
430 if (blocks) { 455 if (blocks) {
431 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | 456 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
432 (blocks << sb->s_blocksize_bits); 457 (blocks << sb->s_blocksize_bits);
433 if (udf_add_aext(inode, last_pos, &last_ext->extLocation, 458 err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
434 last_ext->extLength, 1) == -1) 459 last_ext->extLength, 1);
435 return -1; 460 if (err)
461 return err;
436 count++; 462 count++;
437 } 463 }
438 464
439out: 465out:
440 /* Do we have some preallocated blocks saved? */ 466 /* Do we have some preallocated blocks saved? */
441 if (prealloc_len) { 467 if (prealloc_len) {
442 if (udf_add_aext(inode, last_pos, &prealloc_loc, 468 err = udf_add_aext(inode, last_pos, &prealloc_loc,
443 prealloc_len, 1) == -1) 469 prealloc_len, 1);
444 return -1; 470 if (err)
471 return err;
445 last_ext->extLocation = prealloc_loc; 472 last_ext->extLocation = prealloc_loc;
446 last_ext->extLength = prealloc_len; 473 last_ext->extLength = prealloc_len;
447 count++; 474 count++;
@@ -453,11 +480,68 @@ out:
453 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 480 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
454 last_pos->offset -= sizeof(struct long_ad); 481 last_pos->offset -= sizeof(struct long_ad);
455 else 482 else
456 return -1; 483 return -EIO;
457 484
458 return count; 485 return count;
459} 486}
460 487
488static int udf_extend_file(struct inode *inode, loff_t newsize)
489{
490
491 struct extent_position epos;
492 struct kernel_lb_addr eloc;
493 uint32_t elen;
494 int8_t etype;
495 struct super_block *sb = inode->i_sb;
496 sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
497 int adsize;
498 struct udf_inode_info *iinfo = UDF_I(inode);
499 struct kernel_long_ad extent;
500 int err;
501
502 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
503 adsize = sizeof(struct short_ad);
504 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
505 adsize = sizeof(struct long_ad);
506 else
507 BUG();
508
509 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
510
511 /* File has extent covering the new size (could happen when extending
512 * inside a block)? */
513 if (etype != -1)
514 return 0;
515 if (newsize & (sb->s_blocksize - 1))
516 offset++;
517 /* Extended file just to the boundary of the last file block? */
518 if (offset == 0)
519 return 0;
520
521 /* Truncate is extending the file by 'offset' blocks */
522 if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
523 (epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
524 /* File has no extents at all or has empty last
525 * indirect extent! Create a fake extent... */
526 extent.extLocation.logicalBlockNum = 0;
527 extent.extLocation.partitionReferenceNum = 0;
528 extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
529 } else {
530 epos.offset -= adsize;
531 etype = udf_next_aext(inode, &epos, &extent.extLocation,
532 &extent.extLength, 0);
533 extent.extLength |= etype << 30;
534 }
535 err = udf_do_extend_file(inode, &epos, &extent, offset);
536 if (err < 0)
537 goto out;
538 err = 0;
539 iinfo->i_lenExtents = newsize;
540out:
541 brelse(epos.bh);
542 return err;
543}
544
461static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, 545static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
462 int *err, sector_t *phys, int *new) 546 int *err, sector_t *phys, int *new)
463{ 547{
@@ -540,7 +624,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
540 elen = EXT_RECORDED_ALLOCATED | 624 elen = EXT_RECORDED_ALLOCATED |
541 ((elen + inode->i_sb->s_blocksize - 1) & 625 ((elen + inode->i_sb->s_blocksize - 1) &
542 ~(inode->i_sb->s_blocksize - 1)); 626 ~(inode->i_sb->s_blocksize - 1));
543 etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1); 627 udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
544 } 628 }
545 brelse(prev_epos.bh); 629 brelse(prev_epos.bh);
546 brelse(cur_epos.bh); 630 brelse(cur_epos.bh);
@@ -564,19 +648,17 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
564 memset(&laarr[0].extLocation, 0x00, 648 memset(&laarr[0].extLocation, 0x00,
565 sizeof(struct kernel_lb_addr)); 649 sizeof(struct kernel_lb_addr));
566 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; 650 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
567 /* Will udf_extend_file() create real extent from 651 /* Will udf_do_extend_file() create real extent from
568 a fake one? */ 652 a fake one? */
569 startnum = (offset > 0); 653 startnum = (offset > 0);
570 } 654 }
571 /* Create extents for the hole between EOF and offset */ 655 /* Create extents for the hole between EOF and offset */
572 ret = udf_extend_file(inode, &prev_epos, laarr, offset); 656 ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
573 if (ret == -1) { 657 if (ret < 0) {
574 brelse(prev_epos.bh); 658 brelse(prev_epos.bh);
575 brelse(cur_epos.bh); 659 brelse(cur_epos.bh);
576 brelse(next_epos.bh); 660 brelse(next_epos.bh);
577 /* We don't really know the error here so we just make 661 *err = ret;
578 * something up */
579 *err = -ENOSPC;
580 return NULL; 662 return NULL;
581 } 663 }
582 c = 0; 664 c = 0;
@@ -1005,52 +1087,66 @@ struct buffer_head *udf_bread(struct inode *inode, int block,
1005 return NULL; 1087 return NULL;
1006} 1088}
1007 1089
1008void udf_truncate(struct inode *inode) 1090int udf_setsize(struct inode *inode, loff_t newsize)
1009{ 1091{
1010 int offset;
1011 int err; 1092 int err;
1012 struct udf_inode_info *iinfo; 1093 struct udf_inode_info *iinfo;
1094 int bsize = 1 << inode->i_blkbits;
1013 1095
1014 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1096 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1015 S_ISLNK(inode->i_mode))) 1097 S_ISLNK(inode->i_mode)))
1016 return; 1098 return -EINVAL;
1017 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1099 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1018 return; 1100 return -EPERM;
1019 1101
1020 iinfo = UDF_I(inode); 1102 iinfo = UDF_I(inode);
1021 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1103 if (newsize > inode->i_size) {
1022 down_write(&iinfo->i_data_sem); 1104 down_write(&iinfo->i_data_sem);
1023 if (inode->i_sb->s_blocksize < 1105 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1024 (udf_file_entry_alloc_offset(inode) + 1106 if (bsize <
1025 inode->i_size)) { 1107 (udf_file_entry_alloc_offset(inode) + newsize)) {
1026 udf_expand_file_adinicb(inode, inode->i_size, &err); 1108 err = udf_expand_file_adinicb(inode);
1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1109 if (err) {
1028 inode->i_size = iinfo->i_lenAlloc; 1110 up_write(&iinfo->i_data_sem);
1029 up_write(&iinfo->i_data_sem); 1111 return err;
1030 return; 1112 }
1031 } else 1113 } else
1032 udf_truncate_extents(inode); 1114 iinfo->i_lenAlloc = newsize;
1033 } else { 1115 }
1034 offset = inode->i_size & (inode->i_sb->s_blocksize - 1); 1116 err = udf_extend_file(inode, newsize);
1035 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, 1117 if (err) {
1036 0x00, inode->i_sb->s_blocksize - 1118 up_write(&iinfo->i_data_sem);
1037 offset - udf_file_entry_alloc_offset(inode)); 1119 return err;
1038 iinfo->i_lenAlloc = inode->i_size;
1039 } 1120 }
1121 truncate_setsize(inode, newsize);
1040 up_write(&iinfo->i_data_sem); 1122 up_write(&iinfo->i_data_sem);
1041 } else { 1123 } else {
1042 block_truncate_page(inode->i_mapping, inode->i_size, 1124 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1043 udf_get_block); 1125 down_write(&iinfo->i_data_sem);
1126 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize,
1127 0x00, bsize - newsize -
1128 udf_file_entry_alloc_offset(inode));
1129 iinfo->i_lenAlloc = newsize;
1130 truncate_setsize(inode, newsize);
1131 up_write(&iinfo->i_data_sem);
1132 goto update_time;
1133 }
1134 err = block_truncate_page(inode->i_mapping, newsize,
1135 udf_get_block);
1136 if (err)
1137 return err;
1044 down_write(&iinfo->i_data_sem); 1138 down_write(&iinfo->i_data_sem);
1139 truncate_setsize(inode, newsize);
1045 udf_truncate_extents(inode); 1140 udf_truncate_extents(inode);
1046 up_write(&iinfo->i_data_sem); 1141 up_write(&iinfo->i_data_sem);
1047 } 1142 }
1048 1143update_time:
1049 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); 1144 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
1050 if (IS_SYNC(inode)) 1145 if (IS_SYNC(inode))
1051 udf_sync_inode(inode); 1146 udf_sync_inode(inode);
1052 else 1147 else
1053 mark_inode_dirty(inode); 1148 mark_inode_dirty(inode);
1149 return 0;
1054} 1150}
1055 1151
1056static void __udf_read_inode(struct inode *inode) 1152static void __udf_read_inode(struct inode *inode)
@@ -1637,14 +1733,13 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
1637 return NULL; 1733 return NULL;
1638} 1734}
1639 1735
1640int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, 1736int udf_add_aext(struct inode *inode, struct extent_position *epos,
1641 struct kernel_lb_addr *eloc, uint32_t elen, int inc) 1737 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1642{ 1738{
1643 int adsize; 1739 int adsize;
1644 struct short_ad *sad = NULL; 1740 struct short_ad *sad = NULL;
1645 struct long_ad *lad = NULL; 1741 struct long_ad *lad = NULL;
1646 struct allocExtDesc *aed; 1742 struct allocExtDesc *aed;
1647 int8_t etype;
1648 uint8_t *ptr; 1743 uint8_t *ptr;
1649 struct udf_inode_info *iinfo = UDF_I(inode); 1744 struct udf_inode_info *iinfo = UDF_I(inode);
1650 1745
@@ -1660,7 +1755,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1660 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 1755 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
1661 adsize = sizeof(struct long_ad); 1756 adsize = sizeof(struct long_ad);
1662 else 1757 else
1663 return -1; 1758 return -EIO;
1664 1759
1665 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) { 1760 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) {
1666 unsigned char *sptr, *dptr; 1761 unsigned char *sptr, *dptr;
@@ -1672,12 +1767,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1672 obloc.partitionReferenceNum, 1767 obloc.partitionReferenceNum,
1673 obloc.logicalBlockNum, &err); 1768 obloc.logicalBlockNum, &err);
1674 if (!epos->block.logicalBlockNum) 1769 if (!epos->block.logicalBlockNum)
1675 return -1; 1770 return -ENOSPC;
1676 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, 1771 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
1677 &epos->block, 1772 &epos->block,
1678 0)); 1773 0));
1679 if (!nbh) 1774 if (!nbh)
1680 return -1; 1775 return -EIO;
1681 lock_buffer(nbh); 1776 lock_buffer(nbh);
1682 memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize); 1777 memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize);
1683 set_buffer_uptodate(nbh); 1778 set_buffer_uptodate(nbh);
@@ -1746,7 +1841,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1746 epos->bh = nbh; 1841 epos->bh = nbh;
1747 } 1842 }
1748 1843
1749 etype = udf_write_aext(inode, epos, eloc, elen, inc); 1844 udf_write_aext(inode, epos, eloc, elen, inc);
1750 1845
1751 if (!epos->bh) { 1846 if (!epos->bh) {
1752 iinfo->i_lenAlloc += adsize; 1847 iinfo->i_lenAlloc += adsize;
@@ -1764,11 +1859,11 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1764 mark_buffer_dirty_inode(epos->bh, inode); 1859 mark_buffer_dirty_inode(epos->bh, inode);
1765 } 1860 }
1766 1861
1767 return etype; 1862 return 0;
1768} 1863}
1769 1864
1770int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, 1865void udf_write_aext(struct inode *inode, struct extent_position *epos,
1771 struct kernel_lb_addr *eloc, uint32_t elen, int inc) 1866 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1772{ 1867{
1773 int adsize; 1868 int adsize;
1774 uint8_t *ptr; 1869 uint8_t *ptr;
@@ -1798,7 +1893,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1798 adsize = sizeof(struct long_ad); 1893 adsize = sizeof(struct long_ad);
1799 break; 1894 break;
1800 default: 1895 default:
1801 return -1; 1896 return;
1802 } 1897 }
1803 1898
1804 if (epos->bh) { 1899 if (epos->bh) {
@@ -1817,8 +1912,6 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1817 1912
1818 if (inc) 1913 if (inc)
1819 epos->offset += adsize; 1914 epos->offset += adsize;
1820
1821 return (elen >> 30);
1822} 1915}
1823 1916
1824int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, 1917int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1287 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1288 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1289 1288
1290 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255; 1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1294 return 255;
1295 }
1292 1296
1293 *lenp = 3; 1297 *lenp = 3;
1294 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 225527cdc885..8424308db4b4 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -197,6 +197,11 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
197 mark_buffer_dirty_inode(epos->bh, inode); 197 mark_buffer_dirty_inode(epos->bh, inode);
198} 198}
199 199
200/*
201 * Truncate extents of inode to inode->i_size. This function can be used only
202 * for making file shorter. For making file longer, udf_extend_file() has to
203 * be used.
204 */
200void udf_truncate_extents(struct inode *inode) 205void udf_truncate_extents(struct inode *inode)
201{ 206{
202 struct extent_position epos; 207 struct extent_position epos;
@@ -219,96 +224,65 @@ void udf_truncate_extents(struct inode *inode)
219 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); 224 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
220 byte_offset = (offset << sb->s_blocksize_bits) + 225 byte_offset = (offset << sb->s_blocksize_bits) +
221 (inode->i_size & (sb->s_blocksize - 1)); 226 (inode->i_size & (sb->s_blocksize - 1));
222 if (etype != -1) { 227 if (etype == -1) {
223 epos.offset -= adsize; 228 /* We should extend the file? */
224 extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset); 229 WARN_ON(byte_offset);
225 epos.offset += adsize; 230 return;
226 if (byte_offset) 231 }
227 lenalloc = epos.offset; 232 epos.offset -= adsize;
228 else 233 extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
229 lenalloc = epos.offset - adsize; 234 epos.offset += adsize;
230 235 if (byte_offset)
231 if (!epos.bh) 236 lenalloc = epos.offset;
232 lenalloc -= udf_file_entry_alloc_offset(inode); 237 else
233 else 238 lenalloc = epos.offset - adsize;
234 lenalloc -= sizeof(struct allocExtDesc);
235
236 while ((etype = udf_current_aext(inode, &epos, &eloc,
237 &elen, 0)) != -1) {
238 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
239 udf_write_aext(inode, &epos, &neloc, nelen, 0);
240 if (indirect_ext_len) {
241 /* We managed to free all extents in the
242 * indirect extent - free it too */
243 BUG_ON(!epos.bh);
244 udf_free_blocks(sb, inode, &epos.block,
245 0, indirect_ext_len);
246 } else if (!epos.bh) {
247 iinfo->i_lenAlloc = lenalloc;
248 mark_inode_dirty(inode);
249 } else
250 udf_update_alloc_ext_desc(inode,
251 &epos, lenalloc);
252 brelse(epos.bh);
253 epos.offset = sizeof(struct allocExtDesc);
254 epos.block = eloc;
255 epos.bh = udf_tread(sb,
256 udf_get_lb_pblock(sb, &eloc, 0));
257 if (elen)
258 indirect_ext_len =
259 (elen + sb->s_blocksize - 1) >>
260 sb->s_blocksize_bits;
261 else
262 indirect_ext_len = 1;
263 } else {
264 extent_trunc(inode, &epos, &eloc, etype,
265 elen, 0);
266 epos.offset += adsize;
267 }
268 }
269 239
270 if (indirect_ext_len) { 240 if (!epos.bh)
271 BUG_ON(!epos.bh); 241 lenalloc -= udf_file_entry_alloc_offset(inode);
272 udf_free_blocks(sb, inode, &epos.block, 0, 242 else
273 indirect_ext_len); 243 lenalloc -= sizeof(struct allocExtDesc);
274 } else if (!epos.bh) {
275 iinfo->i_lenAlloc = lenalloc;
276 mark_inode_dirty(inode);
277 } else
278 udf_update_alloc_ext_desc(inode, &epos, lenalloc);
279 } else if (inode->i_size) {
280 if (byte_offset) {
281 struct kernel_long_ad extent;
282 244
283 /* 245 while ((etype = udf_current_aext(inode, &epos, &eloc,
284 * OK, there is not extent covering inode->i_size and 246 &elen, 0)) != -1) {
285 * no extent above inode->i_size => truncate is 247 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
286 * extending the file by 'offset' blocks. 248 udf_write_aext(inode, &epos, &neloc, nelen, 0);
287 */ 249 if (indirect_ext_len) {
288 if ((!epos.bh && 250 /* We managed to free all extents in the
289 epos.offset == 251 * indirect extent - free it too */
290 udf_file_entry_alloc_offset(inode)) || 252 BUG_ON(!epos.bh);
291 (epos.bh && epos.offset == 253 udf_free_blocks(sb, inode, &epos.block,
292 sizeof(struct allocExtDesc))) { 254 0, indirect_ext_len);
293 /* File has no extents at all or has empty last 255 } else if (!epos.bh) {
294 * indirect extent! Create a fake extent... */ 256 iinfo->i_lenAlloc = lenalloc;
295 extent.extLocation.logicalBlockNum = 0; 257 mark_inode_dirty(inode);
296 extent.extLocation.partitionReferenceNum = 0; 258 } else
297 extent.extLength = 259 udf_update_alloc_ext_desc(inode,
298 EXT_NOT_RECORDED_NOT_ALLOCATED; 260 &epos, lenalloc);
299 } else { 261 brelse(epos.bh);
300 epos.offset -= adsize; 262 epos.offset = sizeof(struct allocExtDesc);
301 etype = udf_next_aext(inode, &epos, 263 epos.block = eloc;
302 &extent.extLocation, 264 epos.bh = udf_tread(sb,
303 &extent.extLength, 0); 265 udf_get_lb_pblock(sb, &eloc, 0));
304 extent.extLength |= etype << 30; 266 if (elen)
305 } 267 indirect_ext_len =
306 udf_extend_file(inode, &epos, &extent, 268 (elen + sb->s_blocksize - 1) >>
307 offset + 269 sb->s_blocksize_bits;
308 ((inode->i_size & 270 else
309 (sb->s_blocksize - 1)) != 0)); 271 indirect_ext_len = 1;
272 } else {
273 extent_trunc(inode, &epos, &eloc, etype, elen, 0);
274 epos.offset += adsize;
310 } 275 }
311 } 276 }
277
278 if (indirect_ext_len) {
279 BUG_ON(!epos.bh);
280 udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len);
281 } else if (!epos.bh) {
282 iinfo->i_lenAlloc = lenalloc;
283 mark_inode_dirty(inode);
284 } else
285 udf_update_alloc_ext_desc(inode, &epos, lenalloc);
312 iinfo->i_lenExtents = inode->i_size; 286 iinfo->i_lenExtents = inode->i_size;
313 287
314 brelse(epos.bh); 288 brelse(epos.bh);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index eba48209f9f3..dbd52d4b5eed 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -136,22 +136,20 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
136extern long udf_ioctl(struct file *, unsigned int, unsigned long); 136extern long udf_ioctl(struct file *, unsigned int, unsigned long);
137/* inode.c */ 137/* inode.c */
138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
139extern void udf_expand_file_adinicb(struct inode *, int, int *); 139extern int udf_expand_file_adinicb(struct inode *);
140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
141extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 141extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
142extern void udf_truncate(struct inode *); 142extern int udf_setsize(struct inode *, loff_t);
143extern void udf_read_inode(struct inode *); 143extern void udf_read_inode(struct inode *);
144extern void udf_evict_inode(struct inode *); 144extern void udf_evict_inode(struct inode *);
145extern int udf_write_inode(struct inode *, struct writeback_control *wbc); 145extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
146extern long udf_block_map(struct inode *, sector_t); 146extern long udf_block_map(struct inode *, sector_t);
147extern int udf_extend_file(struct inode *, struct extent_position *,
148 struct kernel_long_ad *, sector_t);
149extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, 147extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
150 struct kernel_lb_addr *, uint32_t *, sector_t *); 148 struct kernel_lb_addr *, uint32_t *, sector_t *);
151extern int8_t udf_add_aext(struct inode *, struct extent_position *, 149extern int udf_add_aext(struct inode *, struct extent_position *,
150 struct kernel_lb_addr *, uint32_t, int);
151extern void udf_write_aext(struct inode *, struct extent_position *,
152 struct kernel_lb_addr *, uint32_t, int); 152 struct kernel_lb_addr *, uint32_t, int);
153extern int8_t udf_write_aext(struct inode *, struct extent_position *,
154 struct kernel_lb_addr *, uint32_t, int);
155extern int8_t udf_delete_aext(struct inode *, struct extent_position, 153extern int8_t udf_delete_aext(struct inode *, struct extent_position,
156 struct kernel_lb_addr, uint32_t); 154 struct kernel_lb_addr, uint32_t);
157extern int8_t udf_next_aext(struct inode *, struct extent_position *, 155extern int8_t udf_next_aext(struct inode *, struct extent_position *,
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index 30c8f223253d..e4f10a40768a 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -1,7 +1,6 @@
1config UFS_FS 1config UFS_FS
2 tristate "UFS file system support (read only)" 2 tristate "UFS file system support (read only)"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BKL # probably fixable
5 help 4 help
6 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, 5 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
7 OpenBSD and NeXTstep) use a file system called UFS. Some System V 6 OpenBSD and NeXTstep) use a file system called UFS. Some System V
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 2b251f2093af..03c255f12df5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -34,7 +34,6 @@
34#include <linux/stat.h> 34#include <linux/stat.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/smp_lock.h>
38#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
39#include <linux/writeback.h> 38#include <linux/writeback.h>
40 39
@@ -43,7 +42,7 @@
43#include "swab.h" 42#include "swab.h"
44#include "util.h" 43#include "util.h"
45 44
46static u64 ufs_frag_map(struct inode *inode, sector_t frag); 45static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
47 46
48static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) 47static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
49{ 48{
@@ -82,7 +81,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
82 * the begining of the filesystem. 81 * the begining of the filesystem.
83 */ 82 */
84 83
85static u64 ufs_frag_map(struct inode *inode, sector_t frag) 84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
86{ 85{
87 struct ufs_inode_info *ufsi = UFS_I(inode); 86 struct ufs_inode_info *ufsi = UFS_I(inode);
88 struct super_block *sb = inode->i_sb; 87 struct super_block *sb = inode->i_sb;
@@ -107,7 +106,8 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
107 106
108 p = offsets; 107 p = offsets;
109 108
110 lock_kernel(); 109 if (needs_lock)
110 lock_ufs(sb);
111 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 111 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
112 goto ufs2; 112 goto ufs2;
113 113
@@ -152,7 +152,8 @@ ufs2:
152 ret = temp + (u64) (frag & uspi->s_fpbmask); 152 ret = temp + (u64) (frag & uspi->s_fpbmask);
153 153
154out: 154out:
155 unlock_kernel(); 155 if (needs_lock)
156 unlock_ufs(sb);
156 return ret; 157 return ret;
157} 158}
158 159
@@ -415,14 +416,16 @@ out:
415int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) 416int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
416{ 417{
417 struct super_block * sb = inode->i_sb; 418 struct super_block * sb = inode->i_sb;
418 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; 419 struct ufs_sb_info * sbi = UFS_SB(sb);
420 struct ufs_sb_private_info * uspi = sbi->s_uspi;
419 struct buffer_head * bh; 421 struct buffer_head * bh;
420 int ret, err, new; 422 int ret, err, new;
421 unsigned long ptr,phys; 423 unsigned long ptr,phys;
422 u64 phys64 = 0; 424 u64 phys64 = 0;
425 bool needs_lock = (sbi->mutex_owner != current);
423 426
424 if (!create) { 427 if (!create) {
425 phys64 = ufs_frag_map(inode, fragment); 428 phys64 = ufs_frag_map(inode, fragment, needs_lock);
426 UFSD("phys64 = %llu\n", (unsigned long long)phys64); 429 UFSD("phys64 = %llu\n", (unsigned long long)phys64);
427 if (phys64) 430 if (phys64)
428 map_bh(bh_result, sb, phys64); 431 map_bh(bh_result, sb, phys64);
@@ -436,7 +439,8 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
436 ret = 0; 439 ret = 0;
437 bh = NULL; 440 bh = NULL;
438 441
439 lock_kernel(); 442 if (needs_lock)
443 lock_ufs(sb);
440 444
441 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 445 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
442 if (fragment > 446 if (fragment >
@@ -498,7 +502,9 @@ out:
498 set_buffer_new(bh_result); 502 set_buffer_new(bh_result);
499 map_bh(bh_result, sb, phys); 503 map_bh(bh_result, sb, phys);
500abort: 504abort:
501 unlock_kernel(); 505 if (needs_lock)
506 unlock_ufs(sb);
507
502 return err; 508 return err;
503 509
504abort_too_big: 510abort_too_big:
@@ -506,48 +512,6 @@ abort_too_big:
506 goto abort; 512 goto abort;
507} 513}
508 514
509static struct buffer_head *ufs_getfrag(struct inode *inode,
510 unsigned int fragment,
511 int create, int *err)
512{
513 struct buffer_head dummy;
514 int error;
515
516 dummy.b_state = 0;
517 dummy.b_blocknr = -1000;
518 error = ufs_getfrag_block(inode, fragment, &dummy, create);
519 *err = error;
520 if (!error && buffer_mapped(&dummy)) {
521 struct buffer_head *bh;
522 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
523 if (buffer_new(&dummy)) {
524 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
525 set_buffer_uptodate(bh);
526 mark_buffer_dirty(bh);
527 }
528 return bh;
529 }
530 return NULL;
531}
532
533struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment,
534 int create, int * err)
535{
536 struct buffer_head * bh;
537
538 UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment);
539 bh = ufs_getfrag (inode, fragment, create, err);
540 if (!bh || buffer_uptodate(bh))
541 return bh;
542 ll_rw_block (READ, 1, &bh);
543 wait_on_buffer (bh);
544 if (buffer_uptodate(bh))
545 return bh;
546 brelse (bh);
547 *err = -EIO;
548 return NULL;
549}
550
551static int ufs_writepage(struct page *page, struct writeback_control *wbc) 515static int ufs_writepage(struct page *page, struct writeback_control *wbc)
552{ 516{
553 return block_write_full_page(page,ufs_getfrag_block,wbc); 517 return block_write_full_page(page,ufs_getfrag_block,wbc);
@@ -900,9 +864,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
900int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) 864int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
901{ 865{
902 int ret; 866 int ret;
903 lock_kernel(); 867 lock_ufs(inode->i_sb);
904 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 868 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
905 unlock_kernel(); 869 unlock_ufs(inode->i_sb);
906 return ret; 870 return ret;
907} 871}
908 872
@@ -922,22 +886,22 @@ void ufs_evict_inode(struct inode * inode)
922 if (want_delete) { 886 if (want_delete) {
923 loff_t old_i_size; 887 loff_t old_i_size;
924 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 888 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
925 lock_kernel(); 889 lock_ufs(inode->i_sb);
926 mark_inode_dirty(inode); 890 mark_inode_dirty(inode);
927 ufs_update_inode(inode, IS_SYNC(inode)); 891 ufs_update_inode(inode, IS_SYNC(inode));
928 old_i_size = inode->i_size; 892 old_i_size = inode->i_size;
929 inode->i_size = 0; 893 inode->i_size = 0;
930 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 894 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
931 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); 895 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
932 unlock_kernel(); 896 unlock_ufs(inode->i_sb);
933 } 897 }
934 898
935 invalidate_inode_buffers(inode); 899 invalidate_inode_buffers(inode);
936 end_writeback(inode); 900 end_writeback(inode);
937 901
938 if (want_delete) { 902 if (want_delete) {
939 lock_kernel(); 903 lock_ufs(inode->i_sb);
940 ufs_free_inode (inode); 904 ufs_free_inode (inode);
941 unlock_kernel(); 905 unlock_ufs(inode->i_sb);
942 } 906 }
943} 907}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..29309e25417f 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -29,7 +29,6 @@
29 29
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/smp_lock.h>
33 32
34#include "ufs_fs.h" 33#include "ufs_fs.h"
35#include "ufs.h" 34#include "ufs.h"
@@ -55,16 +54,16 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
55 if (dentry->d_name.len > UFS_MAXNAMLEN) 54 if (dentry->d_name.len > UFS_MAXNAMLEN)
56 return ERR_PTR(-ENAMETOOLONG); 55 return ERR_PTR(-ENAMETOOLONG);
57 56
58 lock_kernel(); 57 lock_ufs(dir->i_sb);
59 ino = ufs_inode_by_name(dir, &dentry->d_name); 58 ino = ufs_inode_by_name(dir, &dentry->d_name);
60 if (ino) { 59 if (ino) {
61 inode = ufs_iget(dir->i_sb, ino); 60 inode = ufs_iget(dir->i_sb, ino);
62 if (IS_ERR(inode)) { 61 if (IS_ERR(inode)) {
63 unlock_kernel(); 62 unlock_ufs(dir->i_sb);
64 return ERR_CAST(inode); 63 return ERR_CAST(inode);
65 } 64 }
66 } 65 }
67 unlock_kernel(); 66 unlock_ufs(dir->i_sb);
68 d_add(dentry, inode); 67 d_add(dentry, inode);
69 return NULL; 68 return NULL;
70} 69}
@@ -93,9 +92,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
93 inode->i_fop = &ufs_file_operations; 92 inode->i_fop = &ufs_file_operations;
94 inode->i_mapping->a_ops = &ufs_aops; 93 inode->i_mapping->a_ops = &ufs_aops;
95 mark_inode_dirty(inode); 94 mark_inode_dirty(inode);
96 lock_kernel(); 95 lock_ufs(dir->i_sb);
97 err = ufs_add_nondir(dentry, inode); 96 err = ufs_add_nondir(dentry, inode);
98 unlock_kernel(); 97 unlock_ufs(dir->i_sb);
99 } 98 }
100 UFSD("END: err=%d\n", err); 99 UFSD("END: err=%d\n", err);
101 return err; 100 return err;
@@ -115,9 +114,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
115 init_special_inode(inode, mode, rdev); 114 init_special_inode(inode, mode, rdev);
116 ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev); 115 ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev);
117 mark_inode_dirty(inode); 116 mark_inode_dirty(inode);
118 lock_kernel(); 117 lock_ufs(dir->i_sb);
119 err = ufs_add_nondir(dentry, inode); 118 err = ufs_add_nondir(dentry, inode);
120 unlock_kernel(); 119 unlock_ufs(dir->i_sb);
121 } 120 }
122 return err; 121 return err;
123} 122}
@@ -133,7 +132,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
133 if (l > sb->s_blocksize) 132 if (l > sb->s_blocksize)
134 goto out_notlocked; 133 goto out_notlocked;
135 134
136 lock_kernel(); 135 lock_ufs(dir->i_sb);
137 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); 136 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
138 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
139 if (IS_ERR(inode)) 138 if (IS_ERR(inode))
@@ -156,7 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
156 155
157 err = ufs_add_nondir(dentry, inode); 156 err = ufs_add_nondir(dentry, inode);
158out: 157out:
159 unlock_kernel(); 158 unlock_ufs(dir->i_sb);
160out_notlocked: 159out_notlocked:
161 return err; 160 return err;
162 161
@@ -172,9 +171,9 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
172 struct inode *inode = old_dentry->d_inode; 171 struct inode *inode = old_dentry->d_inode;
173 int error; 172 int error;
174 173
175 lock_kernel(); 174 lock_ufs(dir->i_sb);
176 if (inode->i_nlink >= UFS_LINK_MAX) { 175 if (inode->i_nlink >= UFS_LINK_MAX) {
177 unlock_kernel(); 176 unlock_ufs(dir->i_sb);
178 return -EMLINK; 177 return -EMLINK;
179 } 178 }
180 179
@@ -183,7 +182,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
183 ihold(inode); 182 ihold(inode);
184 183
185 error = ufs_add_nondir(dentry, inode); 184 error = ufs_add_nondir(dentry, inode);
186 unlock_kernel(); 185 unlock_ufs(dir->i_sb);
187 return error; 186 return error;
188} 187}
189 188
@@ -195,7 +194,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
195 if (dir->i_nlink >= UFS_LINK_MAX) 194 if (dir->i_nlink >= UFS_LINK_MAX)
196 goto out; 195 goto out;
197 196
198 lock_kernel(); 197 lock_ufs(dir->i_sb);
199 inode_inc_link_count(dir); 198 inode_inc_link_count(dir);
200 199
201 inode = ufs_new_inode(dir, S_IFDIR|mode); 200 inode = ufs_new_inode(dir, S_IFDIR|mode);
@@ -216,7 +215,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
216 err = ufs_add_link(dentry, inode); 215 err = ufs_add_link(dentry, inode);
217 if (err) 216 if (err)
218 goto out_fail; 217 goto out_fail;
219 unlock_kernel(); 218 unlock_ufs(dir->i_sb);
220 219
221 d_instantiate(dentry, inode); 220 d_instantiate(dentry, inode);
222out: 221out:
@@ -228,7 +227,7 @@ out_fail:
228 iput (inode); 227 iput (inode);
229out_dir: 228out_dir:
230 inode_dec_link_count(dir); 229 inode_dec_link_count(dir);
231 unlock_kernel(); 230 unlock_ufs(dir->i_sb);
232 goto out; 231 goto out;
233} 232}
234 233
@@ -259,7 +258,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
259 struct inode * inode = dentry->d_inode; 258 struct inode * inode = dentry->d_inode;
260 int err= -ENOTEMPTY; 259 int err= -ENOTEMPTY;
261 260
262 lock_kernel(); 261 lock_ufs(dir->i_sb);
263 if (ufs_empty_dir (inode)) { 262 if (ufs_empty_dir (inode)) {
264 err = ufs_unlink(dir, dentry); 263 err = ufs_unlink(dir, dentry);
265 if (!err) { 264 if (!err) {
@@ -268,7 +267,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
268 inode_dec_link_count(dir); 267 inode_dec_link_count(dir);
269 } 268 }
270 } 269 }
271 unlock_kernel(); 270 unlock_ufs(dir->i_sb);
272 return err; 271 return err;
273} 272}
274 273
@@ -306,7 +305,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 305 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 306 if (!new_de)
308 goto out_dir; 307 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 308 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 309 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 310 if (dir_de)
@@ -318,12 +316,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 316 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 317 goto out_dir;
320 } 318 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 319 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 320 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 321 goto out_dir;
326 }
327 if (dir_de) 322 if (dir_de)
328 inode_inc_link_count(new_dir); 323 inode_inc_link_count(new_dir);
329 } 324 }
@@ -331,12 +326,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 326 /*
332 * Like most other Unix systems, set the ctime for inodes on a 327 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 328 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 329 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 330 old_inode->i_ctime = CURRENT_TIME_SEC;
337 331
338 ufs_delete_entry(old_dir, old_de, old_page); 332 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 333 mark_inode_dirty(old_inode);
340 334
341 if (dir_de) { 335 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 336 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c61ac5d4e48..7693d6293404 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -84,7 +84,6 @@
84#include <linux/blkdev.h> 84#include <linux/blkdev.h>
85#include <linux/init.h> 85#include <linux/init.h>
86#include <linux/parser.h> 86#include <linux/parser.h>
87#include <linux/smp_lock.h>
88#include <linux/buffer_head.h> 87#include <linux/buffer_head.h>
89#include <linux/vfs.h> 88#include <linux/vfs.h>
90#include <linux/log2.h> 89#include <linux/log2.h>
@@ -96,6 +95,26 @@
96#include "swab.h" 95#include "swab.h"
97#include "util.h" 96#include "util.h"
98 97
98void lock_ufs(struct super_block *sb)
99{
100#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
101 struct ufs_sb_info *sbi = UFS_SB(sb);
102
103 mutex_lock(&sbi->mutex);
104 sbi->mutex_owner = current;
105#endif
106}
107
108void unlock_ufs(struct super_block *sb)
109{
110#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
111 struct ufs_sb_info *sbi = UFS_SB(sb);
112
113 sbi->mutex_owner = NULL;
114 mutex_unlock(&sbi->mutex);
115#endif
116}
117
99static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) 118static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
100{ 119{
101 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 120 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -313,7 +332,6 @@ void ufs_panic (struct super_block * sb, const char * function,
313 struct ufs_super_block_first * usb1; 332 struct ufs_super_block_first * usb1;
314 va_list args; 333 va_list args;
315 334
316 lock_kernel();
317 uspi = UFS_SB(sb)->s_uspi; 335 uspi = UFS_SB(sb)->s_uspi;
318 usb1 = ubh_get_usb_first(uspi); 336 usb1 = ubh_get_usb_first(uspi);
319 337
@@ -521,7 +539,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
521 */ 539 */
522 size = uspi->s_cssize; 540 size = uspi->s_cssize;
523 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 541 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
524 base = space = kmalloc(size, GFP_KERNEL); 542 base = space = kmalloc(size, GFP_NOFS);
525 if (!base) 543 if (!base)
526 goto failed; 544 goto failed;
527 sbi->s_csp = (struct ufs_csum *)space; 545 sbi->s_csp = (struct ufs_csum *)space;
@@ -546,7 +564,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
546 * Read cylinder group (we read only first fragment from block 564 * Read cylinder group (we read only first fragment from block
547 * at this time) and prepare internal data structures for cg caching. 565 * at this time) and prepare internal data structures for cg caching.
548 */ 566 */
549 if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_KERNEL))) 567 if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_NOFS)))
550 goto failed; 568 goto failed;
551 for (i = 0; i < uspi->s_ncg; i++) 569 for (i = 0; i < uspi->s_ncg; i++)
552 sbi->s_ucg[i] = NULL; 570 sbi->s_ucg[i] = NULL;
@@ -564,7 +582,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
564 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data); 582 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data);
565 } 583 }
566 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) { 584 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) {
567 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL))) 585 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_NOFS)))
568 goto failed; 586 goto failed;
569 sbi->s_cgno[i] = UFS_CGNO_EMPTY; 587 sbi->s_cgno[i] = UFS_CGNO_EMPTY;
570 } 588 }
@@ -646,8 +664,6 @@ static void ufs_put_super_internal(struct super_block *sb)
646 664
647 UFSD("ENTER\n"); 665 UFSD("ENTER\n");
648 666
649 lock_kernel();
650
651 ufs_put_cstotal(sb); 667 ufs_put_cstotal(sb);
652 size = uspi->s_cssize; 668 size = uspi->s_cssize;
653 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 669 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -676,8 +692,6 @@ static void ufs_put_super_internal(struct super_block *sb)
676 kfree (sbi->s_ucg); 692 kfree (sbi->s_ucg);
677 kfree (base); 693 kfree (base);
678 694
679 unlock_kernel();
680
681 UFSD("EXIT\n"); 695 UFSD("EXIT\n");
682} 696}
683 697
@@ -696,8 +710,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
696 unsigned maxsymlen; 710 unsigned maxsymlen;
697 int ret = -EINVAL; 711 int ret = -EINVAL;
698 712
699 lock_kernel();
700
701 uspi = NULL; 713 uspi = NULL;
702 ubh = NULL; 714 ubh = NULL;
703 flags = 0; 715 flags = 0;
@@ -718,6 +730,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
718 goto failed; 730 goto failed;
719 } 731 }
720#endif 732#endif
733 mutex_init(&sbi->mutex);
721 /* 734 /*
722 * Set default mount options 735 * Set default mount options
723 * Parse mount options 736 * Parse mount options
@@ -1165,7 +1178,6 @@ magic_found:
1165 goto failed; 1178 goto failed;
1166 1179
1167 UFSD("EXIT\n"); 1180 UFSD("EXIT\n");
1168 unlock_kernel();
1169 return 0; 1181 return 0;
1170 1182
1171dalloc_failed: 1183dalloc_failed:
@@ -1177,12 +1189,10 @@ failed:
1177 kfree(sbi); 1189 kfree(sbi);
1178 sb->s_fs_info = NULL; 1190 sb->s_fs_info = NULL;
1179 UFSD("EXIT (FAILED)\n"); 1191 UFSD("EXIT (FAILED)\n");
1180 unlock_kernel();
1181 return ret; 1192 return ret;
1182 1193
1183failed_nomem: 1194failed_nomem:
1184 UFSD("EXIT (NOMEM)\n"); 1195 UFSD("EXIT (NOMEM)\n");
1185 unlock_kernel();
1186 return -ENOMEM; 1196 return -ENOMEM;
1187} 1197}
1188 1198
@@ -1193,8 +1203,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
1193 struct ufs_super_block_third * usb3; 1203 struct ufs_super_block_third * usb3;
1194 unsigned flags; 1204 unsigned flags;
1195 1205
1206 lock_ufs(sb);
1196 lock_super(sb); 1207 lock_super(sb);
1197 lock_kernel();
1198 1208
1199 UFSD("ENTER\n"); 1209 UFSD("ENTER\n");
1200 1210
@@ -1213,8 +1223,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
1213 sb->s_dirt = 0; 1223 sb->s_dirt = 0;
1214 1224
1215 UFSD("EXIT\n"); 1225 UFSD("EXIT\n");
1216 unlock_kernel();
1217 unlock_super(sb); 1226 unlock_super(sb);
1227 unlock_ufs(sb);
1218 1228
1219 return 0; 1229 return 0;
1220} 1230}
@@ -1256,7 +1266,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1256 unsigned new_mount_opt, ufstype; 1266 unsigned new_mount_opt, ufstype;
1257 unsigned flags; 1267 unsigned flags;
1258 1268
1259 lock_kernel(); 1269 lock_ufs(sb);
1260 lock_super(sb); 1270 lock_super(sb);
1261 uspi = UFS_SB(sb)->s_uspi; 1271 uspi = UFS_SB(sb)->s_uspi;
1262 flags = UFS_SB(sb)->s_flags; 1272 flags = UFS_SB(sb)->s_flags;
@@ -1272,7 +1282,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1272 ufs_set_opt (new_mount_opt, ONERROR_LOCK); 1282 ufs_set_opt (new_mount_opt, ONERROR_LOCK);
1273 if (!ufs_parse_options (data, &new_mount_opt)) { 1283 if (!ufs_parse_options (data, &new_mount_opt)) {
1274 unlock_super(sb); 1284 unlock_super(sb);
1275 unlock_kernel(); 1285 unlock_ufs(sb);
1276 return -EINVAL; 1286 return -EINVAL;
1277 } 1287 }
1278 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1288 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@@ -1280,14 +1290,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1280 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { 1290 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1281 printk("ufstype can't be changed during remount\n"); 1291 printk("ufstype can't be changed during remount\n");
1282 unlock_super(sb); 1292 unlock_super(sb);
1283 unlock_kernel(); 1293 unlock_ufs(sb);
1284 return -EINVAL; 1294 return -EINVAL;
1285 } 1295 }
1286 1296
1287 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1297 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1288 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1298 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1289 unlock_super(sb); 1299 unlock_super(sb);
1290 unlock_kernel(); 1300 unlock_ufs(sb);
1291 return 0; 1301 return 0;
1292 } 1302 }
1293 1303
@@ -1313,7 +1323,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1313 printk("ufs was compiled with read-only support, " 1323 printk("ufs was compiled with read-only support, "
1314 "can't be mounted as read-write\n"); 1324 "can't be mounted as read-write\n");
1315 unlock_super(sb); 1325 unlock_super(sb);
1316 unlock_kernel(); 1326 unlock_ufs(sb);
1317 return -EINVAL; 1327 return -EINVAL;
1318#else 1328#else
1319 if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 1329 if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
@@ -1323,13 +1333,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1323 ufstype != UFS_MOUNT_UFSTYPE_UFS2) { 1333 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1324 printk("this ufstype is read-only supported\n"); 1334 printk("this ufstype is read-only supported\n");
1325 unlock_super(sb); 1335 unlock_super(sb);
1326 unlock_kernel(); 1336 unlock_ufs(sb);
1327 return -EINVAL; 1337 return -EINVAL;
1328 } 1338 }
1329 if (!ufs_read_cylinder_structures(sb)) { 1339 if (!ufs_read_cylinder_structures(sb)) {
1330 printk("failed during remounting\n"); 1340 printk("failed during remounting\n");
1331 unlock_super(sb); 1341 unlock_super(sb);
1332 unlock_kernel(); 1342 unlock_ufs(sb);
1333 return -EPERM; 1343 return -EPERM;
1334 } 1344 }
1335 sb->s_flags &= ~MS_RDONLY; 1345 sb->s_flags &= ~MS_RDONLY;
@@ -1337,7 +1347,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1337 } 1347 }
1338 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1348 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1339 unlock_super(sb); 1349 unlock_super(sb);
1340 unlock_kernel(); 1350 unlock_ufs(sb);
1341 return 0; 1351 return 0;
1342} 1352}
1343 1353
@@ -1371,7 +1381,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1371 struct ufs_super_block_third *usb3; 1381 struct ufs_super_block_third *usb3;
1372 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 1382 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
1373 1383
1374 lock_kernel(); 1384 lock_ufs(sb);
1375 1385
1376 usb1 = ubh_get_usb_first(uspi); 1386 usb1 = ubh_get_usb_first(uspi);
1377 usb2 = ubh_get_usb_second(uspi); 1387 usb2 = ubh_get_usb_second(uspi);
@@ -1395,7 +1405,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1395 buf->f_fsid.val[0] = (u32)id; 1405 buf->f_fsid.val[0] = (u32)id;
1396 buf->f_fsid.val[1] = (u32)(id >> 32); 1406 buf->f_fsid.val[1] = (u32)(id >> 32);
1397 1407
1398 unlock_kernel(); 1408 unlock_ufs(sb);
1399 1409
1400 return 0; 1410 return 0;
1401} 1411}
@@ -1405,7 +1415,7 @@ static struct kmem_cache * ufs_inode_cachep;
1405static struct inode *ufs_alloc_inode(struct super_block *sb) 1415static struct inode *ufs_alloc_inode(struct super_block *sb)
1406{ 1416{
1407 struct ufs_inode_info *ei; 1417 struct ufs_inode_info *ei;
1408 ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL); 1418 ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
1409 if (!ei) 1419 if (!ei)
1410 return NULL; 1420 return NULL;
1411 ei->vfs_inode.i_version = 1; 1421 ei->vfs_inode.i_version = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index a58f9155fc9a..e56a4f567212 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -40,7 +40,6 @@
40#include <linux/time.h> 40#include <linux/time.h>
41#include <linux/stat.h> 41#include <linux/stat.h>
42#include <linux/string.h> 42#include <linux/string.h>
43#include <linux/smp_lock.h>
44#include <linux/buffer_head.h> 43#include <linux/buffer_head.h>
45#include <linux/blkdev.h> 44#include <linux/blkdev.h>
46#include <linux/sched.h> 45#include <linux/sched.h>
@@ -467,7 +466,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
467 466
468 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); 467 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
469 468
470 lock_kernel();
471 while (1) { 469 while (1) {
472 retry = ufs_trunc_direct(inode); 470 retry = ufs_trunc_direct(inode);
473 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK, 471 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -487,7 +485,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
487 485
488 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 486 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
489 ufsi->i_lastfrag = DIRECT_FRAGMENT; 487 ufsi->i_lastfrag = DIRECT_FRAGMENT;
490 unlock_kernel();
491 mark_inode_dirty(inode); 488 mark_inode_dirty(inode);
492out: 489out:
493 UFSD("EXIT: err %d\n", err); 490 UFSD("EXIT: err %d\n", err);
@@ -510,7 +507,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
510 /* XXX(truncate): truncate_setsize should be called last */ 507 /* XXX(truncate): truncate_setsize should be called last */
511 truncate_setsize(inode, attr->ia_size); 508 truncate_setsize(inode, attr->ia_size);
512 509
510 lock_ufs(inode->i_sb);
513 error = ufs_truncate(inode, old_i_size); 511 error = ufs_truncate(inode, old_i_size);
512 unlock_ufs(inode->i_sb);
514 if (error) 513 if (error)
515 return error; 514 return error;
516 } 515 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index c08782e1b48a..5be2755dd715 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -18,6 +18,8 @@ struct ufs_sb_info {
18 unsigned s_cgno[UFS_MAX_GROUP_LOADED]; 18 unsigned s_cgno[UFS_MAX_GROUP_LOADED];
19 unsigned short s_cg_loaded; 19 unsigned short s_cg_loaded;
20 unsigned s_mount_opt; 20 unsigned s_mount_opt;
21 struct mutex mutex;
22 struct task_struct *mutex_owner;
21}; 23};
22 24
23struct ufs_inode_info { 25struct ufs_inode_info {
@@ -109,7 +111,6 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
109extern int ufs_write_inode (struct inode *, struct writeback_control *); 111extern int ufs_write_inode (struct inode *, struct writeback_control *);
110extern int ufs_sync_inode (struct inode *); 112extern int ufs_sync_inode (struct inode *);
111extern void ufs_evict_inode (struct inode *); 113extern void ufs_evict_inode (struct inode *);
112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); 114extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
114 115
115/* namei.c */ 116/* namei.c */
@@ -154,4 +155,7 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
154 return do_div(b, uspi->s_fpg); 155 return do_div(b, uspi->s_fpg);
155} 156}
156 157
158extern void lock_ufs(struct super_block *sb);
159extern void unlock_ufs(struct super_block *sb);
160
157#endif /* _UFS_UFS_H */ 161#endif /* _UFS_UFS_H */
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index d2c36d53fe66..95425b59ce0a 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -27,7 +27,7 @@ struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi,
27 if (count > UFS_MAXFRAG) 27 if (count > UFS_MAXFRAG)
28 return NULL; 28 return NULL;
29 ubh = (struct ufs_buffer_head *) 29 ubh = (struct ufs_buffer_head *)
30 kmalloc (sizeof (struct ufs_buffer_head), GFP_KERNEL); 30 kmalloc (sizeof (struct ufs_buffer_head), GFP_NOFS);
31 if (!ubh) 31 if (!ubh)
32 return NULL; 32 return NULL;
33 ubh->fragment = fragment; 33 ubh->fragment = fragment;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378dd..f83a4c830a65 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
2022 if (!xfslogd_workqueue) 2022 if (!xfslogd_workqueue)
2023 goto out_free_buf_zone; 2023 goto out_free_buf_zone;
2024 2024
2025 xfsdatad_workqueue = create_workqueue("xfsdatad"); 2025 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
2026 if (!xfsdatad_workqueue) 2026 if (!xfsdatad_workqueue)
2027 goto out_destroy_xfslogd_workqueue; 2027 goto out_destroy_xfslogd_workqueue;
2028 2028
2029 xfsconvertd_workqueue = create_workqueue("xfsconvertd"); 2029 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
2030 WQ_MEM_RECLAIM, 1);
2030 if (!xfsconvertd_workqueue) 2031 if (!xfsconvertd_workqueue)
2031 goto out_destroy_xfsdatad_workqueue; 2032 goto out_destroy_xfsdatad_workqueue;
2032 2033
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index bd5727852fd6..9ff7fc603d2f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -102,7 +102,8 @@ xfs_mark_inode_dirty(
102STATIC int 102STATIC int
103xfs_init_security( 103xfs_init_security(
104 struct inode *inode, 104 struct inode *inode,
105 struct inode *dir) 105 struct inode *dir,
106 const struct qstr *qstr)
106{ 107{
107 struct xfs_inode *ip = XFS_I(inode); 108 struct xfs_inode *ip = XFS_I(inode);
108 size_t length; 109 size_t length;
@@ -110,7 +111,7 @@ xfs_init_security(
110 unsigned char *name; 111 unsigned char *name;
111 int error; 112 int error;
112 113
113 error = security_inode_init_security(inode, dir, (char **)&name, 114 error = security_inode_init_security(inode, dir, qstr, (char **)&name,
114 &value, &length); 115 &value, &length);
115 if (error) { 116 if (error) {
116 if (error == -EOPNOTSUPP) 117 if (error == -EOPNOTSUPP)
@@ -194,7 +195,7 @@ xfs_vn_mknod(
194 195
195 inode = VFS_I(ip); 196 inode = VFS_I(ip);
196 197
197 error = xfs_init_security(inode, dir); 198 error = xfs_init_security(inode, dir, &dentry->d_name);
198 if (unlikely(error)) 199 if (unlikely(error))
199 goto out_cleanup_inode; 200 goto out_cleanup_inode;
200 201
@@ -367,7 +368,7 @@ xfs_vn_symlink(
367 368
368 inode = VFS_I(cip); 369 inode = VFS_I(cip);
369 370
370 error = xfs_init_security(inode, dir); 371 error = xfs_init_security(inode, dir, &dentry->d_name);
371 if (unlikely(error)) 372 if (unlikely(error))
372 goto out_cleanup_inode; 373 goto out_cleanup_inode;
373 374
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb6..4aff56395732 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 goto out; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
313 if (!xfs_mru_reap_wq) 313 if (!xfs_mru_reap_wq)
314 goto out_destroy_mru_elem_zone; 314 goto out_destroy_mru_elem_zone;
315 315