aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/inode.c39
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jfs/jfs_dmap.c5
-rw-r--r--fs/jfs/jfs_txnmgr.c6
-rw-r--r--fs/jfs/namei.c3
-rw-r--r--fs/lockd/clntproc.c9
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/callback_proc.c57
-rw-r--r--fs/nfs/client.c7
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/internal.h13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c80
-rw-r--r--fs/nfs/nfs4filelayout.h17
-rw-r--r--fs/nfs/nfs4filelayoutdev.c452
-rw-r--r--fs/nfs/nfs4proc.c215
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfs/nfs4xdr.c247
-rw-r--r--fs/nfs/objlayout/objio_osd.c20
-rw-r--r--fs/nfs/pagelist.c69
-rw-r--r--fs/nfs/pnfs.c221
-rw-r--r--fs/nfs/pnfs.h74
-rw-r--r--fs/nfs/pnfs_dev.c64
-rw-r--r--fs/nfs/read.c166
-rw-r--r--fs/nfs/unlink.c37
-rw-r--r--fs/nfs/write.c156
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/open.c78
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/read_write.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_dir2.c16
-rw-r--r--fs/xfs/xfs_filestream.c14
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_rename.c4
-rw-r--r--fs/xfs/xfs_vnodeops.c10
48 files changed, 1629 insertions, 568 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4d433d34736f..f11e43ed907d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
187 */ 187 */
188static struct inode *anon_inode_mkinode(void) 188static struct inode *anon_inode_mkinode(void)
189{ 189{
190 struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); 190 struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
191 191
192 if (!inode) 192 if (!inode)
193 return ERR_PTR(-ENOMEM); 193 return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e91b097e7252..caa26ab5ed68 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4467,7 +4467,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4467 inode->i_generation = BTRFS_I(inode)->generation; 4467 inode->i_generation = BTRFS_I(inode)->generation;
4468 btrfs_set_inode_space_info(root, inode); 4468 btrfs_set_inode_space_info(root, inode);
4469 4469
4470 if (mode & S_IFDIR) 4470 if (S_ISDIR(mode))
4471 owner = 0; 4471 owner = 0;
4472 else 4472 else
4473 owner = 1; 4473 owner = 1;
@@ -4512,7 +4512,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4512 4512
4513 btrfs_inherit_iflags(inode, dir); 4513 btrfs_inherit_iflags(inode, dir);
4514 4514
4515 if ((mode & S_IFREG)) { 4515 if (S_ISREG(mode)) {
4516 if (btrfs_test_opt(root, NODATASUM)) 4516 if (btrfs_test_opt(root, NODATASUM))
4517 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 4517 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4518 if (btrfs_test_opt(root, NODATACOW) || 4518 if (btrfs_test_opt(root, NODATACOW) ||
diff --git a/fs/dcache.c b/fs/dcache.c
index be18598c7fd7..b05aac3a8cfc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2138,8 +2138,9 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
2138 * @target: new dentry 2138 * @target: new dentry
2139 * 2139 *
2140 * Update the dcache to reflect the move of a file name. Negative 2140 * Update the dcache to reflect the move of a file name. Negative
2141 * dcache entries should not be moved in this way. Caller hold 2141 * dcache entries should not be moved in this way. Caller must hold
2142 * rename_lock. 2142 * rename_lock, the i_mutex of the source and target directories,
2143 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2143 */ 2144 */
2144static void __d_move(struct dentry * dentry, struct dentry * target) 2145static void __d_move(struct dentry * dentry, struct dentry * target)
2145{ 2146{
@@ -2202,7 +2203,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
2202 * @target: new dentry 2203 * @target: new dentry
2203 * 2204 *
2204 * Update the dcache to reflect the move of a file name. Negative 2205 * Update the dcache to reflect the move of a file name. Negative
2205 * dcache entries should not be moved in this way. 2206 * dcache entries should not be moved in this way. See the locking
2207 * requirements for __d_move.
2206 */ 2208 */
2207void d_move(struct dentry *dentry, struct dentry *target) 2209void d_move(struct dentry *dentry, struct dentry *target)
2208{ 2210{
@@ -2320,7 +2322,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2320 * @inode: inode to bind to the dentry, to which aliases may be attached 2322 * @inode: inode to bind to the dentry, to which aliases may be attached
2321 * 2323 *
2322 * Introduces an dentry into the tree, substituting an extant disconnected 2324 * Introduces an dentry into the tree, substituting an extant disconnected
2323 * root directory alias in its place if there is one 2325 * root directory alias in its place if there is one. Caller must hold the
2326 * i_mutex of the parent directory.
2324 */ 2327 */
2325struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2328struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2326{ 2329{
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 516516e0c2a2..3bc073a4cf82 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1018,13 +1018,13 @@ hostdata_error:
1018 fsname++; 1018 fsname++;
1019 if (lm->lm_mount == NULL) { 1019 if (lm->lm_mount == NULL) {
1020 fs_info(sdp, "Now mounting FS...\n"); 1020 fs_info(sdp, "Now mounting FS...\n");
1021 complete(&sdp->sd_locking_init); 1021 complete_all(&sdp->sd_locking_init);
1022 return 0; 1022 return 0;
1023 } 1023 }
1024 ret = lm->lm_mount(sdp, fsname); 1024 ret = lm->lm_mount(sdp, fsname);
1025 if (ret == 0) 1025 if (ret == 0)
1026 fs_info(sdp, "Joined cluster. Now mounting FS...\n"); 1026 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
1027 complete(&sdp->sd_locking_init); 1027 complete_all(&sdp->sd_locking_init);
1028 return ret; 1028 return ret;
1029} 1029}
1030 1030
diff --git a/fs/inode.c b/fs/inode.c
index a48fa5355fb4..d0c72ff6b30e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -361,9 +361,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add);
361 361
362static inline void inode_sb_list_del(struct inode *inode) 362static inline void inode_sb_list_del(struct inode *inode)
363{ 363{
364 spin_lock(&inode_sb_list_lock); 364 if (!list_empty(&inode->i_sb_list)) {
365 list_del_init(&inode->i_sb_list); 365 spin_lock(&inode_sb_list_lock);
366 spin_unlock(&inode_sb_list_lock); 366 list_del_init(&inode->i_sb_list);
367 spin_unlock(&inode_sb_list_lock);
368 }
367} 369}
368 370
369static unsigned long hash(struct super_block *sb, unsigned long hashval) 371static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -796,6 +798,29 @@ unsigned int get_next_ino(void)
796EXPORT_SYMBOL(get_next_ino); 798EXPORT_SYMBOL(get_next_ino);
797 799
798/** 800/**
801 * new_inode_pseudo - obtain an inode
802 * @sb: superblock
803 *
804 * Allocates a new inode for given superblock.
805 * Inode wont be chained in superblock s_inodes list
806 * This means :
807 * - fs can't be unmount
808 * - quotas, fsnotify, writeback can't work
809 */
810struct inode *new_inode_pseudo(struct super_block *sb)
811{
812 struct inode *inode = alloc_inode(sb);
813
814 if (inode) {
815 spin_lock(&inode->i_lock);
816 inode->i_state = 0;
817 spin_unlock(&inode->i_lock);
818 INIT_LIST_HEAD(&inode->i_sb_list);
819 }
820 return inode;
821}
822
823/**
799 * new_inode - obtain an inode 824 * new_inode - obtain an inode
800 * @sb: superblock 825 * @sb: superblock
801 * 826 *
@@ -813,13 +838,9 @@ struct inode *new_inode(struct super_block *sb)
813 838
814 spin_lock_prefetch(&inode_sb_list_lock); 839 spin_lock_prefetch(&inode_sb_list_lock);
815 840
816 inode = alloc_inode(sb); 841 inode = new_inode_pseudo(sb);
817 if (inode) { 842 if (inode)
818 spin_lock(&inode->i_lock);
819 inode->i_state = 0;
820 spin_unlock(&inode->i_lock);
821 inode_sb_list_add(inode); 843 inode_sb_list_add(inode);
822 }
823 return inode; 844 return inode;
824} 845}
825EXPORT_SYMBOL(new_inode); 846EXPORT_SYMBOL(new_inode);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index eeead33d8ef0..b81b35ddf4e4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -80,7 +80,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
80 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); 80 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
81 if (ret) { 81 if (ret) {
82 jffs2_free_raw_inode(ri); 82 jffs2_free_raw_inode(ri);
83 if (S_ISLNK(inode->i_mode & S_IFMT)) 83 if (S_ISLNK(inode->i_mode))
84 kfree(mdata); 84 kfree(mdata);
85 return ret; 85 return ret;
86 } 86 }
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 4496872cf4e7..9cbd11a3f804 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3161,7 +3161,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3161{ 3161{
3162 int rc; 3162 int rc;
3163 int dbitno, word, rembits, nb, nwords, wbitno, agno; 3163 int dbitno, word, rembits, nb, nwords, wbitno, agno;
3164 s8 oldroot, *leaf; 3164 s8 oldroot;
3165 struct dmaptree *tp = (struct dmaptree *) & dp->tree; 3165 struct dmaptree *tp = (struct dmaptree *) & dp->tree;
3166 3166
3167 /* save the current value of the root (i.e. maximum free string) 3167 /* save the current value of the root (i.e. maximum free string)
@@ -3169,9 +3169,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3169 */ 3169 */
3170 oldroot = tp->stree[ROOT]; 3170 oldroot = tp->stree[ROOT];
3171 3171
3172 /* pick up a pointer to the leaves of the dmap tree */
3173 leaf = tp->stree + LEAFIND;
3174
3175 /* determine the bit number and word within the dmap of the 3172 /* determine the bit number and word within the dmap of the
3176 * starting block. 3173 * starting block.
3177 */ 3174 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f6cc0c09ec63..af9606057dde 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1143,7 +1143,6 @@ int txCommit(tid_t tid, /* transaction identifier */
1143 struct jfs_log *log; 1143 struct jfs_log *log;
1144 struct tblock *tblk; 1144 struct tblock *tblk;
1145 struct lrd *lrd; 1145 struct lrd *lrd;
1146 int lsn;
1147 struct inode *ip; 1146 struct inode *ip;
1148 struct jfs_inode_info *jfs_ip; 1147 struct jfs_inode_info *jfs_ip;
1149 int k, n; 1148 int k, n;
@@ -1310,7 +1309,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1310 */ 1309 */
1311 lrd->type = cpu_to_le16(LOG_COMMIT); 1310 lrd->type = cpu_to_le16(LOG_COMMIT);
1312 lrd->length = 0; 1311 lrd->length = 0;
1313 lsn = lmLog(log, tblk, lrd, NULL); 1312 lmLog(log, tblk, lrd, NULL);
1314 1313
1315 lmGroupCommit(log, tblk); 1314 lmGroupCommit(log, tblk);
1316 1315
@@ -2935,7 +2934,6 @@ int jfs_sync(void *arg)
2935{ 2934{
2936 struct inode *ip; 2935 struct inode *ip;
2937 struct jfs_inode_info *jfs_ip; 2936 struct jfs_inode_info *jfs_ip;
2938 int rc;
2939 tid_t tid; 2937 tid_t tid;
2940 2938
2941 do { 2939 do {
@@ -2961,7 +2959,7 @@ int jfs_sync(void *arg)
2961 */ 2959 */
2962 TXN_UNLOCK(); 2960 TXN_UNLOCK();
2963 tid = txBegin(ip->i_sb, COMMIT_INODE); 2961 tid = txBegin(ip->i_sb, COMMIT_INODE);
2964 rc = txCommit(tid, 1, &ip, 0); 2962 txCommit(tid, 1, &ip, 0);
2965 txEnd(tid); 2963 txEnd(tid);
2966 mutex_unlock(&jfs_ip->commit_mutex); 2964 mutex_unlock(&jfs_ip->commit_mutex);
2967 2965
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 29b1f1a21142..e17545e15664 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -893,7 +893,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
893 unchar *i_fastsymlink; 893 unchar *i_fastsymlink;
894 s64 xlen = 0; 894 s64 xlen = 0;
895 int bmask = 0, xsize; 895 int bmask = 0, xsize;
896 s64 extent = 0, xaddr; 896 s64 xaddr;
897 struct metapage *mp; 897 struct metapage *mp;
898 struct super_block *sb; 898 struct super_block *sb;
899 struct tblock *tblk; 899 struct tblock *tblk;
@@ -993,7 +993,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
993 txAbort(tid, 0); 993 txAbort(tid, 0);
994 goto out3; 994 goto out3;
995 } 995 }
996 extent = xaddr;
997 ip->i_size = ssize - 1; 996 ip->i_size = ssize - 1;
998 while (ssize) { 997 while (ssize) {
999 /* This is kind of silly since PATH_MAX == 4K */ 998 /* This is kind of silly since PATH_MAX == 4K */
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e374050a911c..8392cb85bd54 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
302 /* We appear to be out of the grace period */ 302 /* We appear to be out of the grace period */
303 wake_up_all(&host->h_gracewait); 303 wake_up_all(&host->h_gracewait);
304 } 304 }
305 dprintk("lockd: server returns status %d\n", resp->status); 305 dprintk("lockd: server returns status %d\n",
306 ntohl(resp->status));
306 return 0; /* Okay, call complete */ 307 return 0; /* Okay, call complete */
307 } 308 }
308 309
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
690 goto out; 691 goto out;
691 692
692 if (resp->status != nlm_lck_denied_nolocks) 693 if (resp->status != nlm_lck_denied_nolocks)
693 printk("lockd: unexpected unlock status: %d\n", resp->status); 694 printk("lockd: unexpected unlock status: %d\n",
695 ntohl(resp->status));
694 /* What to do now? I'm out of my depth... */ 696 /* What to do now? I'm out of my depth... */
695 status = -ENOLCK; 697 status = -ENOLCK;
696out: 698out:
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
843 return -ENOLCK; 845 return -ENOLCK;
844#endif 846#endif
845 } 847 }
846 printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); 848 printk(KERN_NOTICE "lockd: unexpected server status %d\n",
849 ntohl(status));
847 return -ENOLCK; 850 return -ENOLCK;
848} 851}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 81515545ba75..2cde5d954750 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@ config NFS_V4
77config NFS_V4_1 77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
80 select SUNRPC_BACKCHANNEL
80 select PNFS_FILE_LAYOUT 81 select PNFS_FILE_LAYOUT
81 help 82 help
82 This option enables support for minor version 1 of the NFSv4 protocol 83 This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb9..74780f9f852c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
111static u32 initiate_file_draining(struct nfs_client *clp, 111static u32 initiate_file_draining(struct nfs_client *clp,
112 struct cb_layoutrecallargs *args) 112 struct cb_layoutrecallargs *args)
113{ 113{
114 struct nfs_server *server;
114 struct pnfs_layout_hdr *lo; 115 struct pnfs_layout_hdr *lo;
115 struct inode *ino; 116 struct inode *ino;
116 bool found = false; 117 bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
118 LIST_HEAD(free_me_list); 119 LIST_HEAD(free_me_list);
119 120
120 spin_lock(&clp->cl_lock); 121 spin_lock(&clp->cl_lock);
121 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 122 rcu_read_lock();
122 if (nfs_compare_fh(&args->cbl_fh, 123 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
123 &NFS_I(lo->plh_inode)->fh)) 124 list_for_each_entry(lo, &server->layouts, plh_layouts) {
124 continue; 125 if (nfs_compare_fh(&args->cbl_fh,
125 ino = igrab(lo->plh_inode); 126 &NFS_I(lo->plh_inode)->fh))
126 if (!ino) 127 continue;
127 continue; 128 ino = igrab(lo->plh_inode);
128 found = true; 129 if (!ino)
129 /* Without this, layout can be freed as soon 130 continue;
130 * as we release cl_lock. 131 found = true;
131 */ 132 /* Without this, layout can be freed as soon
132 get_layout_hdr(lo); 133 * as we release cl_lock.
133 break; 134 */
135 get_layout_hdr(lo);
136 break;
137 }
138 if (found)
139 break;
134 } 140 }
141 rcu_read_unlock();
135 spin_unlock(&clp->cl_lock); 142 spin_unlock(&clp->cl_lock);
143
136 if (!found) 144 if (!found)
137 return NFS4ERR_NOMATCHING_LAYOUT; 145 return NFS4ERR_NOMATCHING_LAYOUT;
138 146
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
154static u32 initiate_bulk_draining(struct nfs_client *clp, 162static u32 initiate_bulk_draining(struct nfs_client *clp,
155 struct cb_layoutrecallargs *args) 163 struct cb_layoutrecallargs *args)
156{ 164{
165 struct nfs_server *server;
157 struct pnfs_layout_hdr *lo; 166 struct pnfs_layout_hdr *lo;
158 struct inode *ino; 167 struct inode *ino;
159 u32 rv = NFS4ERR_NOMATCHING_LAYOUT; 168 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
167 }; 176 };
168 177
169 spin_lock(&clp->cl_lock); 178 spin_lock(&clp->cl_lock);
170 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 179 rcu_read_lock();
180 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
171 if ((args->cbl_recall_type == RETURN_FSID) && 181 if ((args->cbl_recall_type == RETURN_FSID) &&
172 memcmp(&NFS_SERVER(lo->plh_inode)->fsid, 182 memcmp(&server->fsid, &args->cbl_fsid,
173 &args->cbl_fsid, sizeof(struct nfs_fsid))) 183 sizeof(struct nfs_fsid)))
174 continue;
175 if (!igrab(lo->plh_inode))
176 continue; 184 continue;
177 get_layout_hdr(lo); 185
178 BUG_ON(!list_empty(&lo->plh_bulk_recall)); 186 list_for_each_entry(lo, &server->layouts, plh_layouts) {
179 list_add(&lo->plh_bulk_recall, &recall_list); 187 if (!igrab(lo->plh_inode))
188 continue;
189 get_layout_hdr(lo);
190 BUG_ON(!list_empty(&lo->plh_bulk_recall));
191 list_add(&lo->plh_bulk_recall, &recall_list);
192 }
180 } 193 }
194 rcu_read_unlock();
181 spin_unlock(&clp->cl_lock); 195 spin_unlock(&clp->cl_lock);
196
182 list_for_each_entry_safe(lo, tmp, 197 list_for_each_entry_safe(lo, tmp,
183 &recall_list, plh_bulk_recall) { 198 &recall_list, plh_bulk_recall) {
184 ino = lo->plh_inode; 199 ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b88b65b..19ea7d9c75e6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
188 cred = rpc_lookup_machine_cred(); 188 cred = rpc_lookup_machine_cred();
189 if (!IS_ERR(cred)) 189 if (!IS_ERR(cred))
190 clp->cl_machine_cred = cred; 190 clp->cl_machine_cred = cred;
191#if defined(CONFIG_NFS_V4_1)
192 INIT_LIST_HEAD(&clp->cl_layouts);
193#endif
194 nfs_fscache_get_client_cookie(clp); 191 nfs_fscache_get_client_cookie(clp);
195 192
196 return clp; 193 return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
293 nfs4_deviceid_purge_client(clp); 290 nfs4_deviceid_purge_client(clp);
294 291
295 kfree(clp->cl_hostname); 292 kfree(clp->cl_hostname);
293 kfree(clp->server_scope);
296 kfree(clp); 294 kfree(clp);
297 295
298 dprintk("<-- nfs_free_client()\n"); 296 dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
1062 INIT_LIST_HEAD(&server->client_link); 1060 INIT_LIST_HEAD(&server->client_link);
1063 INIT_LIST_HEAD(&server->master_link); 1061 INIT_LIST_HEAD(&server->master_link);
1064 INIT_LIST_HEAD(&server->delegations); 1062 INIT_LIST_HEAD(&server->delegations);
1063 INIT_LIST_HEAD(&server->layouts);
1065 1064
1066 atomic_set(&server->active, 0); 1065 atomic_set(&server->active, 0);
1067 1066
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1464 dprintk("<-- %s %p\n", __func__, clp); 1463 dprintk("<-- %s %p\n", __func__, clp);
1465 return clp; 1464 return clp;
1466} 1465}
1467EXPORT_SYMBOL(nfs4_set_ds_client); 1466EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
1468 1467
1469/* 1468/*
1470 * Session has been established, and the client marked ready. 1469 * Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2aec375..321a66bc3846 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
398 return err; 398 return err;
399} 399}
400 400
401static void nfs_mark_return_delegation(struct nfs_delegation *delegation) 401static void nfs_mark_return_delegation(struct nfs_server *server,
402 struct nfs_delegation *delegation)
402{ 403{
403 struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
404
405 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 404 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
406 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 405 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
407} 406}
408 407
409/** 408/**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
441 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 440 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
442 continue; 441 continue;
443 if (delegation->type & flags) 442 if (delegation->type & flags)
444 nfs_mark_return_delegation(delegation); 443 nfs_mark_return_delegation(server, delegation);
445 } 444 }
446} 445}
447 446
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
508 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 507 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
509 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 508 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
510 continue; 509 continue;
511 nfs_mark_return_delegation(delegation); 510 nfs_mark_return_delegation(server, delegation);
512 } 511 }
513} 512}
514 513
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
539int nfs_async_inode_return_delegation(struct inode *inode, 538int nfs_async_inode_return_delegation(struct inode *inode,
540 const nfs4_stateid *stateid) 539 const nfs4_stateid *stateid)
541{ 540{
542 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 541 struct nfs_server *server = NFS_SERVER(inode);
542 struct nfs_client *clp = server->nfs_client;
543 struct nfs_delegation *delegation; 543 struct nfs_delegation *delegation;
544 544
545 rcu_read_lock(); 545 rcu_read_lock();
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
549 rcu_read_unlock(); 549 rcu_read_unlock();
550 return -ENOENT; 550 return -ENOENT;
551 } 551 }
552 nfs_mark_return_delegation(delegation); 552 nfs_mark_return_delegation(server, delegation);
553 rcu_read_unlock(); 553 rcu_read_unlock();
554 554
555 nfs_delegation_run_state_manager(clp); 555 nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347a2daa..ab12913dd473 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
277extern char *nfs_path(char **p, struct dentry *dentry, 277extern char *nfs_path(char **p, struct dentry *dentry,
278 char *buffer, ssize_t buflen); 278 char *buffer, ssize_t buflen);
279extern struct vfsmount *nfs_d_automount(struct path *path); 279extern struct vfsmount *nfs_d_automount(struct path *path);
280#ifdef CONFIG_NFS_V4
281rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
282#endif
280 283
281/* getroot.c */ 284/* getroot.c */
282extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 285extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
288extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 291extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
289#endif 292#endif
290 293
294struct nfs_pageio_descriptor;
291/* read.c */ 295/* read.c */
292extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
293 const struct rpc_call_ops *call_ops); 297 const struct rpc_call_ops *call_ops);
294extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 298extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
300 struct list_head *head);
301
302extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
303extern void nfs_readdata_release(struct nfs_read_data *rdata);
295 304
296/* write.c */ 305/* write.c */
306extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
307 struct list_head *head);
308extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
309extern void nfs_writedata_release(struct nfs_write_data *wdata);
297extern void nfs_commit_free(struct nfs_write_data *p); 310extern void nfs_commit_free(struct nfs_write_data *p);
298extern int nfs_initiate_write(struct nfs_write_data *data, 311extern int nfs_initiate_write(struct nfs_write_data *data,
299 struct rpc_clnt *clnt, 312 struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063bacd285..8102391bb374 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
119} 119}
120 120
121#ifdef CONFIG_NFS_V4 121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) 122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{ 123{
124 struct gss_api_mech *mech; 124 struct gss_api_mech *mech;
125 struct xdr_netobj oid; 125 struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2eb1ba0..1909ee8be350 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@ enum nfs4_client_state {
48 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
49 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
50 NFS4CLNT_LEASE_CONFIRM, 50 NFS4CLNT_LEASE_CONFIRM,
51 NFS4CLNT_SERVER_SCOPE_MISMATCH,
51}; 52};
52 53
53enum nfs4_session_state { 54enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
66 int cache_reply); 67 int cache_reply);
67 int (*validate_stateid)(struct nfs_delegation *, 68 int (*validate_stateid)(struct nfs_delegation *,
68 const nfs4_stateid *); 69 const nfs4_stateid *);
70 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
71 struct nfs_fsinfo *);
69 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 72 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
70 const struct nfs4_state_recovery_ops *nograce_recovery_ops; 73 const struct nfs4_state_recovery_ops *nograce_recovery_ops;
71 const struct nfs4_state_maintenance_ops *state_renewal_ops; 74 const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
349extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 352extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
350extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 353extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
351extern void nfs41_handle_recall_slot(struct nfs_client *clp); 354extern void nfs41_handle_recall_slot(struct nfs_client *clp);
355extern void nfs41_handle_server_scope(struct nfs_client *,
356 struct server_scope **);
352extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 357extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
353extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 358extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
354extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); 359extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03abcd04c..be93a622872c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
334 __func__, data->inode->i_ino, 334 __func__, data->inode->i_ino,
335 data->args.pgbase, (size_t)data->args.count, offset); 335 data->args.pgbase, (size_t)data->args.count, offset);
336 336
337 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
338 return PNFS_NOT_ATTEMPTED;
339
337 /* Retrieve the correct rpc_client for the byte range */ 340 /* Retrieve the correct rpc_client for the byte range */
338 j = nfs4_fl_calc_j_index(lseg, offset); 341 j = nfs4_fl_calc_j_index(lseg, offset);
339 idx = nfs4_fl_calc_ds_index(lseg, j); 342 idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
344 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 347 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
345 return PNFS_NOT_ATTEMPTED; 348 return PNFS_NOT_ATTEMPTED;
346 } 349 }
347 dprintk("%s USE DS:ip %x %hu\n", __func__, 350 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
348 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
349 351
350 /* No multipath support. Use first DS */ 352 /* No multipath support. Use first DS */
351 data->ds_clp = ds->ds_clp; 353 data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
374 struct nfs_fh *fh; 376 struct nfs_fh *fh;
375 int status; 377 int status;
376 378
379 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
380 return PNFS_NOT_ATTEMPTED;
381
377 /* Retrieve the correct rpc_client for the byte range */ 382 /* Retrieve the correct rpc_client for the byte range */
378 j = nfs4_fl_calc_j_index(lseg, offset); 383 j = nfs4_fl_calc_j_index(lseg, offset);
379 idx = nfs4_fl_calc_ds_index(lseg, j); 384 idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
384 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 389 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
385 return PNFS_NOT_ATTEMPTED; 390 return PNFS_NOT_ATTEMPTED;
386 } 391 }
387 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, 392 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
388 data->inode->i_ino, sync, (size_t) data->args.count, offset, 393 data->inode->i_ino, sync, (size_t) data->args.count, offset,
389 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 394 ds->ds_remotestr);
390 395
391 data->write_done_cb = filelayout_write_done_cb; 396 data->write_done_cb = filelayout_write_done_cb;
392 data->ds_clp = ds->ds_clp; 397 data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
428 433
429 dprintk("--> %s\n", __func__); 434 dprintk("--> %s\n", __func__);
430 435
436 /* FIXME: remove this check when layout segment support is added */
437 if (lgr->range.offset != 0 ||
438 lgr->range.length != NFS4_MAX_UINT64) {
439 dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
440 __func__);
441 goto out;
442 }
443
431 if (fl->pattern_offset > lgr->range.offset) { 444 if (fl->pattern_offset > lgr->range.offset) {
432 dprintk("%s pattern_offset %lld too large\n", 445 dprintk("%s pattern_offset %lld too large\n",
433 __func__, fl->pattern_offset); 446 __func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
449 goto out; 462 goto out;
450 } else 463 } else
451 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 464 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
465 /* Found deviceid is being reaped */
466 if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
467 goto out_put;
468
452 fl->dsaddr = dsaddr; 469 fl->dsaddr = dsaddr;
453 470
454 if (fl->first_stripe_index < 0 || 471 if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 * return true : coalesce page 676 * return true : coalesce page
660 * return false : don't coalesce page 677 * return false : don't coalesce page
661 */ 678 */
662bool 679static bool
663filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 680filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
664 struct nfs_page *req) 681 struct nfs_page *req)
665{ 682{
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
670 !nfs_generic_pg_test(pgio, prev, req)) 687 !nfs_generic_pg_test(pgio, prev, req))
671 return false; 688 return false;
672 689
673 if (!pgio->pg_lseg)
674 return 1;
675 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 690 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
676 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 691 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
677 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 692 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
682 return (p_stripe == r_stripe); 697 return (p_stripe == r_stripe);
683} 698}
684 699
700void
701filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
702 struct nfs_page *req)
703{
704 BUG_ON(pgio->pg_lseg != NULL);
705
706 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
707 req->wb_context,
708 0,
709 NFS4_MAX_UINT64,
710 IOMODE_READ,
711 GFP_KERNEL);
712 /* If no lseg, fall back to read through mds */
713 if (pgio->pg_lseg == NULL)
714 nfs_pageio_reset_read_mds(pgio);
715}
716
717void
718filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
719 struct nfs_page *req)
720{
721 BUG_ON(pgio->pg_lseg != NULL);
722
723 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
724 req->wb_context,
725 0,
726 NFS4_MAX_UINT64,
727 IOMODE_RW,
728 GFP_NOFS);
729 /* If no lseg, fall back to write through mds */
730 if (pgio->pg_lseg == NULL)
731 nfs_pageio_reset_write_mds(pgio);
732}
733
734static const struct nfs_pageio_ops filelayout_pg_read_ops = {
735 .pg_init = filelayout_pg_init_read,
736 .pg_test = filelayout_pg_test,
737 .pg_doio = pnfs_generic_pg_readpages,
738};
739
740static const struct nfs_pageio_ops filelayout_pg_write_ops = {
741 .pg_init = filelayout_pg_init_write,
742 .pg_test = filelayout_pg_test,
743 .pg_doio = pnfs_generic_pg_writepages,
744};
745
685static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) 746static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
686{ 747{
687 return !FILELAYOUT_LSEG(lseg)->commit_through_mds; 748 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
879 .owner = THIS_MODULE, 940 .owner = THIS_MODULE,
880 .alloc_lseg = filelayout_alloc_lseg, 941 .alloc_lseg = filelayout_alloc_lseg,
881 .free_lseg = filelayout_free_lseg, 942 .free_lseg = filelayout_free_lseg,
882 .pg_test = filelayout_pg_test, 943 .pg_read_ops = &filelayout_pg_read_ops,
944 .pg_write_ops = &filelayout_pg_write_ops,
883 .mark_pnfs_commit = filelayout_mark_pnfs_commit, 945 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
884 .choose_commit_list = filelayout_choose_commit_list, 946 .choose_commit_list = filelayout_choose_commit_list,
885 .commit_pagelist = filelayout_commit_pagelist, 947 .commit_pagelist = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
902 pnfs_unregister_layoutdriver(&filelayout_type); 964 pnfs_unregister_layoutdriver(&filelayout_type);
903} 965}
904 966
967MODULE_ALIAS("nfs-layouttype4-1");
968
905module_init(nfs4filelayout_init); 969module_init(nfs4filelayout_init);
906module_exit(nfs4filelayout_exit); 970module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e3795e..2e42284253fa 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@ enum stripetype4 {
47}; 47};
48 48
49/* Individual ip address */ 49/* Individual ip address */
50struct nfs4_pnfs_ds_addr {
51 struct sockaddr_storage da_addr;
52 size_t da_addrlen;
53 struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
54 char *da_remotestr; /* human readable addr+port */
55};
56
50struct nfs4_pnfs_ds { 57struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 58 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr; 59 char *ds_remotestr; /* comma sep list of addrs */
53 u32 ds_port; 60 struct list_head ds_addrs;
54 struct nfs_client *ds_clp; 61 struct nfs_client *ds_clp;
55 atomic_t ds_count; 62 atomic_t ds_count;
56}; 63};
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
89 generic_hdr); 96 generic_hdr);
90} 97}
91 98
99static inline struct nfs4_deviceid_node *
100FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
101{
102 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
103}
104
92extern struct nfs_fh * 105extern struct nfs_fh *
93nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 106nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
94 107
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf1377264..ed388aae9689 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
56 printk("%s NULL device\n", __func__); 56 printk("%s NULL device\n", __func__);
57 return; 57 return;
58 } 58 }
59 printk(" ip_addr %x port %hu\n" 59 printk(" ds %s\n"
60 " ref count %d\n" 60 " ref count %d\n"
61 " client %p\n" 61 " client %p\n"
62 " cl_exchange_flags %x\n", 62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), 63 ds->ds_remotestr,
64 atomic_read(&ds->ds_count), ds->ds_clp, 64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66} 66}
67 67
68/* nfs4_ds_cache_lock is held */ 68static bool
69static struct nfs4_pnfs_ds * 69same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
70_data_server_lookup_locked(u32 ip_addr, u32 port)
71{ 70{
72 struct nfs4_pnfs_ds *ds; 71 struct sockaddr_in *a, *b;
72 struct sockaddr_in6 *a6, *b6;
73
74 if (addr1->sa_family != addr2->sa_family)
75 return false;
76
77 switch (addr1->sa_family) {
78 case AF_INET:
79 a = (struct sockaddr_in *)addr1;
80 b = (struct sockaddr_in *)addr2;
81
82 if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
83 a->sin_port == b->sin_port)
84 return true;
85 break;
86
87 case AF_INET6:
88 a6 = (struct sockaddr_in6 *)addr1;
89 b6 = (struct sockaddr_in6 *)addr2;
90
91 /* LINKLOCAL addresses must have matching scope_id */
92 if (ipv6_addr_scope(&a6->sin6_addr) ==
93 IPV6_ADDR_SCOPE_LINKLOCAL &&
94 a6->sin6_scope_id != b6->sin6_scope_id)
95 return false;
96
97 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
98 a6->sin6_port == b6->sin6_port)
99 return true;
100 break;
101
102 default:
103 dprintk("%s: unhandled address family: %u\n",
104 __func__, addr1->sa_family);
105 return false;
106 }
73 107
74 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", 108 return false;
75 ntohl(ip_addr), ntohs(port)); 109}
76 110
77 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 111/*
78 if (ds->ds_ip_addr == ip_addr && 112 * Lookup DS by addresses. The first matching address returns true.
79 ds->ds_port == port) { 113 * nfs4_ds_cache_lock is held
80 return ds; 114 */
115static struct nfs4_pnfs_ds *
116_data_server_lookup_locked(struct list_head *dsaddrs)
117{
118 struct nfs4_pnfs_ds *ds;
119 struct nfs4_pnfs_ds_addr *da1, *da2;
120
121 list_for_each_entry(da1, dsaddrs, da_node) {
122 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
123 list_for_each_entry(da2, &ds->ds_addrs, da_node) {
124 if (same_sockaddr(
125 (struct sockaddr *)&da1->da_addr,
126 (struct sockaddr *)&da2->da_addr))
127 return ds;
128 }
81 } 129 }
82 } 130 }
83 return NULL; 131 return NULL;
84} 132}
85 133
86/* 134/*
135 * Compare two lists of addresses.
136 */
137static bool
138_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
139 struct list_head *dsaddrs2)
140{
141 struct nfs4_pnfs_ds_addr *da1, *da2;
142 size_t count1 = 0,
143 count2 = 0;
144
145 list_for_each_entry(da1, dsaddrs1, da_node)
146 count1++;
147
148 list_for_each_entry(da2, dsaddrs2, da_node) {
149 bool found = false;
150 count2++;
151 list_for_each_entry(da1, dsaddrs1, da_node) {
152 if (same_sockaddr((struct sockaddr *)&da1->da_addr,
153 (struct sockaddr *)&da2->da_addr)) {
154 found = true;
155 break;
156 }
157 }
158 if (!found)
159 return false;
160 }
161
162 return (count1 == count2);
163}
164
165/*
87 * Create an rpc connection to the nfs4_pnfs_ds data server 166 * Create an rpc connection to the nfs4_pnfs_ds data server
88 * Currently only support IPv4 167 * Currently only supports IPv4 and IPv6 addresses
89 */ 168 */
90static int 169static int
91nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 170nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
92{ 171{
93 struct nfs_client *clp; 172 struct nfs_client *clp = ERR_PTR(-EIO);
94 struct sockaddr_in sin; 173 struct nfs4_pnfs_ds_addr *da;
95 int status = 0; 174 int status = 0;
96 175
97 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, 176 dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
98 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
99 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 177 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
100 178
101 sin.sin_family = AF_INET; 179 BUG_ON(list_empty(&ds->ds_addrs));
102 sin.sin_addr.s_addr = ds->ds_ip_addr; 180
103 sin.sin_port = ds->ds_port; 181 list_for_each_entry(da, &ds->ds_addrs, da_node) {
182 dprintk("%s: DS %s: trying address %s\n",
183 __func__, ds->ds_remotestr, da->da_remotestr);
184
185 clp = nfs4_set_ds_client(mds_srv->nfs_client,
186 (struct sockaddr *)&da->da_addr,
187 da->da_addrlen, IPPROTO_TCP);
188 if (!IS_ERR(clp))
189 break;
190 }
104 191
105 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
106 sizeof(sin), IPPROTO_TCP);
107 if (IS_ERR(clp)) { 192 if (IS_ERR(clp)) {
108 status = PTR_ERR(clp); 193 status = PTR_ERR(clp);
109 goto out; 194 goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
115 goto out_put; 200 goto out_put;
116 } 201 }
117 ds->ds_clp = clp; 202 ds->ds_clp = clp;
118 dprintk("%s [existing] ip=%x, port=%hu\n", __func__, 203 dprintk("%s [existing] server=%s\n", __func__,
119 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 204 ds->ds_remotestr);
120 goto out; 205 goto out;
121 } 206 }
122 207
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
135 goto out_put; 220 goto out_put;
136 221
137 ds->ds_clp = clp; 222 ds->ds_clp = clp;
138 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), 223 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
139 ntohs(ds->ds_port));
140out: 224out:
141 return status; 225 return status;
142out_put: 226out_put:
@@ -147,12 +231,25 @@ out_put:
147static void 231static void
148destroy_ds(struct nfs4_pnfs_ds *ds) 232destroy_ds(struct nfs4_pnfs_ds *ds)
149{ 233{
234 struct nfs4_pnfs_ds_addr *da;
235
150 dprintk("--> %s\n", __func__); 236 dprintk("--> %s\n", __func__);
151 ifdebug(FACILITY) 237 ifdebug(FACILITY)
152 print_ds(ds); 238 print_ds(ds);
153 239
154 if (ds->ds_clp) 240 if (ds->ds_clp)
155 nfs_put_client(ds->ds_clp); 241 nfs_put_client(ds->ds_clp);
242
243 while (!list_empty(&ds->ds_addrs)) {
244 da = list_first_entry(&ds->ds_addrs,
245 struct nfs4_pnfs_ds_addr,
246 da_node);
247 list_del_init(&da->da_node);
248 kfree(da->da_remotestr);
249 kfree(da);
250 }
251
252 kfree(ds->ds_remotestr);
156 kfree(ds); 253 kfree(ds);
157} 254}
158 255
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
179 kfree(dsaddr); 276 kfree(dsaddr);
180} 277}
181 278
279/*
280 * Create a string with a human readable address and port to avoid
281 * complicated setup around many dprinks.
282 */
283static char *
284nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
285{
286 struct nfs4_pnfs_ds_addr *da;
287 char *remotestr;
288 size_t len;
289 char *p;
290
291 len = 3; /* '{', '}' and eol */
292 list_for_each_entry(da, dsaddrs, da_node) {
293 len += strlen(da->da_remotestr) + 1; /* string plus comma */
294 }
295
296 remotestr = kzalloc(len, gfp_flags);
297 if (!remotestr)
298 return NULL;
299
300 p = remotestr;
301 *(p++) = '{';
302 len--;
303 list_for_each_entry(da, dsaddrs, da_node) {
304 size_t ll = strlen(da->da_remotestr);
305
306 if (ll > len)
307 goto out_err;
308
309 memcpy(p, da->da_remotestr, ll);
310 p += ll;
311 len -= ll;
312
313 if (len < 1)
314 goto out_err;
315 (*p++) = ',';
316 len--;
317 }
318 if (len < 2)
319 goto out_err;
320 *(p++) = '}';
321 *p = '\0';
322 return remotestr;
323out_err:
324 kfree(remotestr);
325 return NULL;
326}
327
182static struct nfs4_pnfs_ds * 328static struct nfs4_pnfs_ds *
183nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) 329nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
184{ 330{
185 struct nfs4_pnfs_ds *tmp_ds, *ds; 331 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
332 char *remotestr;
186 333
187 ds = kzalloc(sizeof(*tmp_ds), gfp_flags); 334 if (list_empty(dsaddrs)) {
335 dprintk("%s: no addresses defined\n", __func__);
336 goto out;
337 }
338
339 ds = kzalloc(sizeof(*ds), gfp_flags);
188 if (!ds) 340 if (!ds)
189 goto out; 341 goto out;
190 342
343 /* this is only used for debugging, so it's ok if its NULL */
344 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
345
191 spin_lock(&nfs4_ds_cache_lock); 346 spin_lock(&nfs4_ds_cache_lock);
192 tmp_ds = _data_server_lookup_locked(ip_addr, port); 347 tmp_ds = _data_server_lookup_locked(dsaddrs);
193 if (tmp_ds == NULL) { 348 if (tmp_ds == NULL) {
194 ds->ds_ip_addr = ip_addr; 349 INIT_LIST_HEAD(&ds->ds_addrs);
195 ds->ds_port = port; 350 list_splice_init(dsaddrs, &ds->ds_addrs);
351 ds->ds_remotestr = remotestr;
196 atomic_set(&ds->ds_count, 1); 352 atomic_set(&ds->ds_count, 1);
197 INIT_LIST_HEAD(&ds->ds_node); 353 INIT_LIST_HEAD(&ds->ds_node);
198 ds->ds_clp = NULL; 354 ds->ds_clp = NULL;
199 list_add(&ds->ds_node, &nfs4_data_server_cache); 355 list_add(&ds->ds_node, &nfs4_data_server_cache);
200 dprintk("%s add new data server ip 0x%x\n", __func__, 356 dprintk("%s add new data server %s\n", __func__,
201 ds->ds_ip_addr); 357 ds->ds_remotestr);
202 } else { 358 } else {
359 if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
360 dsaddrs)) {
361 dprintk("%s: multipath address mismatch: %s != %s",
362 __func__, tmp_ds->ds_remotestr, remotestr);
363 }
364 kfree(remotestr);
203 kfree(ds); 365 kfree(ds);
204 atomic_inc(&tmp_ds->ds_count); 366 atomic_inc(&tmp_ds->ds_count);
205 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", 367 dprintk("%s data server %s found, inc'ed ds_count to %d\n",
206 __func__, tmp_ds->ds_ip_addr, 368 __func__, tmp_ds->ds_remotestr,
207 atomic_read(&tmp_ds->ds_count)); 369 atomic_read(&tmp_ds->ds_count));
208 ds = tmp_ds; 370 ds = tmp_ds;
209 } 371 }
@@ -213,18 +375,22 @@ out:
213} 375}
214 376
215/* 377/*
216 * Currently only support ipv4, and one multi-path address. 378 * Currently only supports ipv4, ipv6 and one multi-path address.
217 */ 379 */
218static struct nfs4_pnfs_ds * 380static struct nfs4_pnfs_ds_addr *
219decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) 381decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
220{ 382{
221 struct nfs4_pnfs_ds *ds = NULL; 383 struct nfs4_pnfs_ds_addr *da = NULL;
222 char *buf; 384 char *buf, *portstr;
223 const char *ipend, *pstr; 385 u32 port;
224 u32 ip_addr, port; 386 int nlen, rlen;
225 int nlen, rlen, i;
226 int tmp[2]; 387 int tmp[2];
227 __be32 *p; 388 __be32 *p;
389 char *netid, *match_netid;
390 size_t len, match_netid_len;
391 char *startsep = "";
392 char *endsep = "";
393
228 394
229 /* r_netid */ 395 /* r_netid */
230 p = xdr_inline_decode(streamp, 4); 396 p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
236 if (unlikely(!p)) 402 if (unlikely(!p))
237 goto out_err; 403 goto out_err;
238 404
239 /* Check that netid is "tcp" */ 405 netid = kmalloc(nlen+1, gfp_flags);
240 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { 406 if (unlikely(!netid))
241 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
242 goto out_err; 407 goto out_err;
243 }
244 408
245 /* r_addr */ 409 netid[nlen] = '\0';
410 memcpy(netid, p, nlen);
411
412 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
246 p = xdr_inline_decode(streamp, 4); 413 p = xdr_inline_decode(streamp, 4);
247 if (unlikely(!p)) 414 if (unlikely(!p))
248 goto out_err; 415 goto out_free_netid;
249 rlen = be32_to_cpup(p); 416 rlen = be32_to_cpup(p);
250 417
251 p = xdr_inline_decode(streamp, rlen); 418 p = xdr_inline_decode(streamp, rlen);
252 if (unlikely(!p)) 419 if (unlikely(!p))
253 goto out_err; 420 goto out_free_netid;
254 421
255 /* ipv6 length plus port is legal */ 422 /* port is ".ABC.DEF", 8 chars max */
256 if (rlen > INET6_ADDRSTRLEN + 8) { 423 if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
257 dprintk("%s: Invalid address, length %d\n", __func__, 424 dprintk("%s: Invalid address, length %d\n", __func__,
258 rlen); 425 rlen);
259 goto out_err; 426 goto out_free_netid;
260 } 427 }
261 buf = kmalloc(rlen + 1, gfp_flags); 428 buf = kmalloc(rlen + 1, gfp_flags);
262 if (!buf) { 429 if (!buf) {
263 dprintk("%s: Not enough memory\n", __func__); 430 dprintk("%s: Not enough memory\n", __func__);
264 goto out_err; 431 goto out_free_netid;
265 } 432 }
266 buf[rlen] = '\0'; 433 buf[rlen] = '\0';
267 memcpy(buf, p, rlen); 434 memcpy(buf, p, rlen);
268 435
269 /* replace the port dots with dashes for the in4_pton() delimiter*/ 436 /* replace port '.' with '-' */
270 for (i = 0; i < 2; i++) { 437 portstr = strrchr(buf, '.');
271 char *res = strrchr(buf, '.'); 438 if (!portstr) {
272 if (!res) { 439 dprintk("%s: Failed finding expected dot in port\n",
273 dprintk("%s: Failed finding expected dots in port\n", 440 __func__);
274 __func__); 441 goto out_free_buf;
275 goto out_free; 442 }
276 } 443 *portstr = '-';
277 *res = '-'; 444
445 /* find '.' between address and port */
446 portstr = strrchr(buf, '.');
447 if (!portstr) {
448 dprintk("%s: Failed finding expected dot between address and "
449 "port\n", __func__);
450 goto out_free_buf;
278 } 451 }
452 *portstr = '\0';
279 453
280 /* Currently only support ipv4 address */ 454 da = kzalloc(sizeof(*da), gfp_flags);
281 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { 455 if (unlikely(!da))
282 dprintk("%s: Only ipv4 addresses supported\n", __func__); 456 goto out_free_buf;
283 goto out_free; 457
458 INIT_LIST_HEAD(&da->da_node);
459
460 if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
461 sizeof(da->da_addr))) {
462 dprintk("%s: error parsing address %s\n", __func__, buf);
463 goto out_free_da;
284 } 464 }
285 465
286 /* port */ 466 portstr++;
287 pstr = ipend; 467 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
288 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
289 port = htons((tmp[0] << 8) | (tmp[1])); 468 port = htons((tmp[0] << 8) | (tmp[1]));
290 469
291 ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); 470 switch (da->da_addr.ss_family) {
292 dprintk("%s: Decoded address and port %s\n", __func__, buf); 471 case AF_INET:
293out_free: 472 ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
473 da->da_addrlen = sizeof(struct sockaddr_in);
474 match_netid = "tcp";
475 match_netid_len = 3;
476 break;
477
478 case AF_INET6:
479 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
480 da->da_addrlen = sizeof(struct sockaddr_in6);
481 match_netid = "tcp6";
482 match_netid_len = 4;
483 startsep = "[";
484 endsep = "]";
485 break;
486
487 default:
488 dprintk("%s: unsupported address family: %u\n",
489 __func__, da->da_addr.ss_family);
490 goto out_free_da;
491 }
492
493 if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
494 dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
495 __func__, netid, match_netid);
496 goto out_free_da;
497 }
498
499 /* save human readable address */
500 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
501 da->da_remotestr = kzalloc(len, gfp_flags);
502
503 /* NULL is ok, only used for dprintk */
504 if (da->da_remotestr)
505 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
506 buf, endsep, ntohs(port));
507
508 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
294 kfree(buf); 509 kfree(buf);
510 kfree(netid);
511 return da;
512
513out_free_da:
514 kfree(da);
515out_free_buf:
516 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
517 kfree(buf);
518out_free_netid:
519 kfree(netid);
295out_err: 520out_err:
296 return ds; 521 return NULL;
297} 522}
298 523
299/* Decode opaque device data and return the result */ 524/* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
310 struct xdr_stream stream; 535 struct xdr_stream stream;
311 struct xdr_buf buf; 536 struct xdr_buf buf;
312 struct page *scratch; 537 struct page *scratch;
538 struct list_head dsaddrs;
539 struct nfs4_pnfs_ds_addr *da;
313 540
314 /* set up xdr stream */ 541 /* set up xdr stream */
315 scratch = alloc_page(gfp_flags); 542 scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
386 NFS_SERVER(ino)->nfs_client, 613 NFS_SERVER(ino)->nfs_client,
387 &pdev->dev_id); 614 &pdev->dev_id);
388 615
616 INIT_LIST_HEAD(&dsaddrs);
617
389 for (i = 0; i < dsaddr->ds_num; i++) { 618 for (i = 0; i < dsaddr->ds_num; i++) {
390 int j; 619 int j;
391 u32 mp_count; 620 u32 mp_count;
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
395 goto out_err_free_deviceid; 624 goto out_err_free_deviceid;
396 625
397 mp_count = be32_to_cpup(p); /* multipath count */ 626 mp_count = be32_to_cpup(p); /* multipath count */
398 if (mp_count > 1) {
399 printk(KERN_WARNING
400 "%s: Multipath count %d not supported, "
401 "skipping all greater than 1\n", __func__,
402 mp_count);
403 }
404 for (j = 0; j < mp_count; j++) { 627 for (j = 0; j < mp_count; j++) {
405 if (j == 0) { 628 da = decode_ds_addr(&stream, gfp_flags);
406 dsaddr->ds_list[i] = decode_and_add_ds(&stream, 629 if (da)
407 ino, gfp_flags); 630 list_add_tail(&da->da_node, &dsaddrs);
408 if (dsaddr->ds_list[i] == NULL) 631 }
409 goto out_err_free_deviceid; 632 if (list_empty(&dsaddrs)) {
410 } else { 633 dprintk("%s: no suitable DS addresses found\n",
411 u32 len; 634 __func__);
412 /* skip extra multipath */ 635 goto out_err_free_deviceid;
413 636 }
414 /* read len, skip */ 637
415 p = xdr_inline_decode(&stream, 4); 638 dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
416 if (unlikely(!p)) 639 if (!dsaddr->ds_list[i])
417 goto out_err_free_deviceid; 640 goto out_err_drain_dsaddrs;
418 len = be32_to_cpup(p); 641
419 642 /* If DS was already in cache, free ds addrs */
420 p = xdr_inline_decode(&stream, len); 643 while (!list_empty(&dsaddrs)) {
421 if (unlikely(!p)) 644 da = list_first_entry(&dsaddrs,
422 goto out_err_free_deviceid; 645 struct nfs4_pnfs_ds_addr,
423 646 da_node);
424 /* read len, skip */ 647 list_del_init(&da->da_node);
425 p = xdr_inline_decode(&stream, 4); 648 kfree(da->da_remotestr);
426 if (unlikely(!p)) 649 kfree(da);
427 goto out_err_free_deviceid;
428 len = be32_to_cpup(p);
429
430 p = xdr_inline_decode(&stream, len);
431 if (unlikely(!p))
432 goto out_err_free_deviceid;
433 }
434 } 650 }
435 } 651 }
436 652
437 __free_page(scratch); 653 __free_page(scratch);
438 return dsaddr; 654 return dsaddr;
439 655
656out_err_drain_dsaddrs:
657 while (!list_empty(&dsaddrs)) {
658 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
659 da_node);
660 list_del_init(&da->da_node);
661 kfree(da->da_remotestr);
662 kfree(da);
663 }
440out_err_free_deviceid: 664out_err_free_deviceid:
441 nfs4_fl_free_deviceid(dsaddr); 665 nfs4_fl_free_deviceid(dsaddr);
442 /* stripe_indicies was part of dsaddr */ 666 /* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
591 815
592static void 816static void
593filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, 817filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
594 int err, u32 ds_addr) 818 int err, const char *ds_remotestr)
595{ 819{
596 u32 *p = (u32 *)&dsaddr->id_node.deviceid; 820 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
597 821
598 printk(KERN_ERR "NFS: data server %x connection error %d." 822 printk(KERN_ERR "NFS: data server %s connection error %d."
599 " Deviceid [%x%x%x%x] marked out of use.\n", 823 " Deviceid [%x%x%x%x] marked out of use.\n",
600 ds_addr, err, p[0], p[1], p[2], p[3]); 824 ds_remotestr, err, p[0], p[1], p[2], p[3]);
601 825
602 spin_lock(&nfs4_ds_cache_lock); 826 spin_lock(&nfs4_ds_cache_lock);
603 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; 827 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
628 err = nfs4_ds_connect(s, ds); 852 err = nfs4_ds_connect(s, ds);
629 if (err) { 853 if (err) {
630 filelayout_mark_devid_negative(dsaddr, err, 854 filelayout_mark_devid_negative(dsaddr, err,
631 ntohl(ds->ds_ip_addr)); 855 ds->ds_remotestr);
632 return NULL; 856 return NULL;
633 } 857 }
634 } 858 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 26bece8f3083..079614deca3f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
81 struct nfs_fattr *fattr, struct iattr *sattr, 81 struct nfs_fattr *fattr, struct iattr *sattr,
82 struct nfs4_state *state); 82 struct nfs4_state *state);
83 83#ifdef CONFIG_NFS_V4_1
84static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
85static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
86#endif
84/* Prevent leaks of NFSv4 errors into userland */ 87/* Prevent leaks of NFSv4 errors into userland */
85static int nfs4_map_errors(int err) 88static int nfs4_map_errors(int err)
86{ 89{
@@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
1689 return ret; 1692 return ret;
1690} 1693}
1691 1694
1695#if defined(CONFIG_NFS_V4_1)
1696static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
1697{
1698 int status;
1699 struct nfs_server *server = NFS_SERVER(state->inode);
1700
1701 status = nfs41_test_stateid(server, state);
1702 if (status == NFS_OK)
1703 return 0;
1704 nfs41_free_stateid(server, state);
1705 return nfs4_open_expired(sp, state);
1706}
1707#endif
1708
1692/* 1709/*
1693 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* 1710 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
1694 * fields corresponding to attributes that were used to store the verifier. 1711 * fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2252static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2269static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2253 struct nfs_fsinfo *info) 2270 struct nfs_fsinfo *info)
2254{ 2271{
2272 int minor_version = server->nfs_client->cl_minorversion;
2255 int status = nfs4_lookup_root(server, fhandle, info); 2273 int status = nfs4_lookup_root(server, fhandle, info);
2256 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) 2274 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
2257 /* 2275 /*
2258 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM 2276 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
2259 * by nfs4_map_errors() as this function exits. 2277 * by nfs4_map_errors() as this function exits.
2260 */ 2278 */
2261 status = nfs4_find_root_sec(server, fhandle, info); 2279 status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
2262 if (status == 0) 2280 if (status == 0)
2263 status = nfs4_server_capabilities(server, fhandle); 2281 status = nfs4_server_capabilities(server, fhandle);
2264 if (status == 0) 2282 if (status == 0)
@@ -4441,6 +4459,20 @@ out:
4441 return err; 4459 return err;
4442} 4460}
4443 4461
4462#if defined(CONFIG_NFS_V4_1)
4463static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
4464{
4465 int status;
4466 struct nfs_server *server = NFS_SERVER(state->inode);
4467
4468 status = nfs41_test_stateid(server, state);
4469 if (status == NFS_OK)
4470 return 0;
4471 nfs41_free_stateid(server, state);
4472 return nfs4_lock_expired(state, request);
4473}
4474#endif
4475
4444static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4476static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4445{ 4477{
4446 struct nfs_inode *nfsi = NFS_I(state->inode); 4478 struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@ out_inval:
4779 return -NFS4ERR_INVAL; 4811 return -NFS4ERR_INVAL;
4780} 4812}
4781 4813
4814static bool
4815nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
4816{
4817 if (a->server_scope_sz == b->server_scope_sz &&
4818 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
4819 return true;
4820
4821 return false;
4822}
4823
4782/* 4824/*
4783 * nfs4_proc_exchange_id() 4825 * nfs4_proc_exchange_id()
4784 * 4826 *
@@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4821 init_utsname()->domainname, 4863 init_utsname()->domainname,
4822 clp->cl_rpcclient->cl_auth->au_flavor); 4864 clp->cl_rpcclient->cl_auth->au_flavor);
4823 4865
4866 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
4867 if (unlikely(!res.server_scope))
4868 return -ENOMEM;
4869
4824 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 4870 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4825 if (!status) 4871 if (!status)
4826 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4872 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4873
4874 if (!status) {
4875 if (clp->server_scope &&
4876 !nfs41_same_server_scope(clp->server_scope,
4877 res.server_scope)) {
4878 dprintk("%s: server_scope mismatch detected\n",
4879 __func__);
4880 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
4881 kfree(clp->server_scope);
4882 clp->server_scope = NULL;
4883 }
4884
4885 if (!clp->server_scope)
4886 clp->server_scope = res.server_scope;
4887 else
4888 kfree(res.server_scope);
4889 }
4890
4827 dprintk("<-- %s status= %d\n", __func__, status); 4891 dprintk("<-- %s status= %d\n", __func__, status);
4828 return status; 4892 return status;
4829} 4893}
@@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
5704{ 5768{
5705 struct nfs4_layoutreturn *lrp = calldata; 5769 struct nfs4_layoutreturn *lrp = calldata;
5706 struct nfs_server *server; 5770 struct nfs_server *server;
5707 struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; 5771 struct pnfs_layout_hdr *lo = lrp->args.layout;
5708 5772
5709 dprintk("--> %s\n", __func__); 5773 dprintk("--> %s\n", __func__);
5710 5774
@@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata)
5733 struct nfs4_layoutreturn *lrp = calldata; 5797 struct nfs4_layoutreturn *lrp = calldata;
5734 5798
5735 dprintk("--> %s\n", __func__); 5799 dprintk("--> %s\n", __func__);
5736 put_layout_hdr(NFS_I(lrp->args.inode)->layout); 5800 put_layout_hdr(lrp->args.layout);
5737 kfree(calldata); 5801 kfree(calldata);
5738 dprintk("<-- %s\n", __func__); 5802 dprintk("<-- %s\n", __func__);
5739} 5803}
@@ -5901,6 +5965,143 @@ out:
5901 rpc_put_task(task); 5965 rpc_put_task(task);
5902 return status; 5966 return status;
5903} 5967}
5968
5969static int
5970_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
5971 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
5972{
5973 struct nfs41_secinfo_no_name_args args = {
5974 .style = SECINFO_STYLE_CURRENT_FH,
5975 };
5976 struct nfs4_secinfo_res res = {
5977 .flavors = flavors,
5978 };
5979 struct rpc_message msg = {
5980 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
5981 .rpc_argp = &args,
5982 .rpc_resp = &res,
5983 };
5984 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5985}
5986
5987static int
5988nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
5989 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
5990{
5991 struct nfs4_exception exception = { };
5992 int err;
5993 do {
5994 err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
5995 switch (err) {
5996 case 0:
5997 case -NFS4ERR_WRONGSEC:
5998 case -NFS4ERR_NOTSUPP:
5999 break;
6000 default:
6001 err = nfs4_handle_exception(server, err, &exception);
6002 }
6003 } while (exception.retry);
6004 return err;
6005}
6006
6007static int
6008nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
6009 struct nfs_fsinfo *info)
6010{
6011 int err;
6012 struct page *page;
6013 rpc_authflavor_t flavor;
6014 struct nfs4_secinfo_flavors *flavors;
6015
6016 page = alloc_page(GFP_KERNEL);
6017 if (!page) {
6018 err = -ENOMEM;
6019 goto out;
6020 }
6021
6022 flavors = page_address(page);
6023 err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
6024
6025 /*
6026 * Fall back on "guess and check" method if
6027 * the server doesn't support SECINFO_NO_NAME
6028 */
6029 if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
6030 err = nfs4_find_root_sec(server, fhandle, info);
6031 goto out_freepage;
6032 }
6033 if (err)
6034 goto out_freepage;
6035
6036 flavor = nfs_find_best_sec(flavors);
6037 if (err == 0)
6038 err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
6039
6040out_freepage:
6041 put_page(page);
6042 if (err == -EACCES)
6043 return -EPERM;
6044out:
6045 return err;
6046}
6047static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
6048{
6049 int status;
6050 struct nfs41_test_stateid_args args = {
6051 .stateid = &state->stateid,
6052 };
6053 struct nfs41_test_stateid_res res;
6054 struct rpc_message msg = {
6055 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
6056 .rpc_argp = &args,
6057 .rpc_resp = &res,
6058 };
6059 args.seq_args.sa_session = res.seq_res.sr_session = NULL;
6060 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
6061 return status;
6062}
6063
6064static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
6065{
6066 struct nfs4_exception exception = { };
6067 int err;
6068 do {
6069 err = nfs4_handle_exception(server,
6070 _nfs41_test_stateid(server, state),
6071 &exception);
6072 } while (exception.retry);
6073 return err;
6074}
6075
6076static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
6077{
6078 int status;
6079 struct nfs41_free_stateid_args args = {
6080 .stateid = &state->stateid,
6081 };
6082 struct nfs41_free_stateid_res res;
6083 struct rpc_message msg = {
6084 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
6085 .rpc_argp = &args,
6086 .rpc_resp = &res,
6087 };
6088
6089 args.seq_args.sa_session = res.seq_res.sr_session = NULL;
6090 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
6091 return status;
6092}
6093
6094static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
6095{
6096 struct nfs4_exception exception = { };
6097 int err;
6098 do {
6099 err = nfs4_handle_exception(server,
6100 _nfs4_free_stateid(server, state),
6101 &exception);
6102 } while (exception.retry);
6103 return err;
6104}
5904#endif /* CONFIG_NFS_V4_1 */ 6105#endif /* CONFIG_NFS_V4_1 */
5905 6106
5906struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 6107struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
5937struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { 6138struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
5938 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6139 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
5939 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6140 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
5940 .recover_open = nfs4_open_expired, 6141 .recover_open = nfs41_open_expired,
5941 .recover_lock = nfs4_lock_expired, 6142 .recover_lock = nfs41_lock_expired,
5942 .establish_clid = nfs41_init_clientid, 6143 .establish_clid = nfs41_init_clientid,
5943 .get_clid_cred = nfs4_get_exchange_id_cred, 6144 .get_clid_cred = nfs4_get_exchange_id_cred,
5944}; 6145};
@@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
5962 .minor_version = 0, 6163 .minor_version = 0,
5963 .call_sync = _nfs4_call_sync, 6164 .call_sync = _nfs4_call_sync,
5964 .validate_stateid = nfs4_validate_delegation_stateid, 6165 .validate_stateid = nfs4_validate_delegation_stateid,
6166 .find_root_sec = nfs4_find_root_sec,
5965 .reboot_recovery_ops = &nfs40_reboot_recovery_ops, 6167 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
5966 .nograce_recovery_ops = &nfs40_nograce_recovery_ops, 6168 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
5967 .state_renewal_ops = &nfs40_state_renewal_ops, 6169 .state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
5972 .minor_version = 1, 6174 .minor_version = 1,
5973 .call_sync = _nfs4_call_sync_session, 6175 .call_sync = _nfs4_call_sync_session,
5974 .validate_stateid = nfs41_validate_delegation_stateid, 6176 .validate_stateid = nfs41_validate_delegation_stateid,
6177 .find_root_sec = nfs41_find_root_sec,
5975 .reboot_recovery_ops = &nfs41_reboot_recovery_ops, 6178 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
5976 .nograce_recovery_ops = &nfs41_nograce_recovery_ops, 6179 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
5977 .state_renewal_ops = &nfs41_state_renewal_ops, 6180 .state_renewal_ops = &nfs41_state_renewal_ops,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7acfe8843626..72ab97ef3d61 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
1643 goto out_error; 1643 goto out_error;
1644 } 1644 }
1645 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1645 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1646 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1646
1647 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
1648 &clp->cl_state))
1649 nfs4_state_start_reclaim_nograce(clp);
1650 else
1651 set_bit(NFS4CLNT_RECLAIM_REBOOT,
1652 &clp->cl_state);
1653
1647 pnfs_destroy_all_layouts(clp); 1654 pnfs_destroy_all_layouts(clp);
1648 } 1655 }
1649 1656
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6e8f3b9a1de..c191a9baa422 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int);
343 1 /* FIXME: opaque lrf_body always empty at the moment */) 343 1 /* FIXME: opaque lrf_body always empty at the moment */)
344#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 344#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
345 1 + decode_stateid_maxsz) 345 1 + decode_stateid_maxsz)
346#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
347#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
348#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \
349 XDR_QUADLEN(NFS4_STATEID_SIZE))
350#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
351#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
352 XDR_QUADLEN(NFS4_STATEID_SIZE))
353#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
346#else /* CONFIG_NFS_V4_1 */ 354#else /* CONFIG_NFS_V4_1 */
347#define encode_sequence_maxsz 0 355#define encode_sequence_maxsz 0
348#define decode_sequence_maxsz 0 356#define decode_sequence_maxsz 0
@@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int);
772 decode_sequence_maxsz + \ 780 decode_sequence_maxsz + \
773 decode_putfh_maxsz + \ 781 decode_putfh_maxsz + \
774 decode_layoutreturn_maxsz) 782 decode_layoutreturn_maxsz)
783#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \
784 encode_sequence_maxsz + \
785 encode_putrootfh_maxsz +\
786 encode_secinfo_no_name_maxsz)
787#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \
788 decode_sequence_maxsz + \
789 decode_putrootfh_maxsz + \
790 decode_secinfo_no_name_maxsz)
791#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \
792 encode_sequence_maxsz + \
793 encode_test_stateid_maxsz)
794#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \
795 decode_sequence_maxsz + \
796 decode_test_stateid_maxsz)
797#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \
798 encode_sequence_maxsz + \
799 encode_free_stateid_maxsz)
800#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \
801 decode_sequence_maxsz + \
802 decode_free_stateid_maxsz)
775 803
776const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 804const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
777 compound_encode_hdr_maxsz + 805 compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr,
1938 hdr->nops++; 1966 hdr->nops++;
1939 hdr->replen += decode_layoutreturn_maxsz; 1967 hdr->replen += decode_layoutreturn_maxsz;
1940} 1968}
1969
1970static int
1971encode_secinfo_no_name(struct xdr_stream *xdr,
1972 const struct nfs41_secinfo_no_name_args *args,
1973 struct compound_hdr *hdr)
1974{
1975 __be32 *p;
1976 p = reserve_space(xdr, 8);
1977 *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
1978 *p++ = cpu_to_be32(args->style);
1979 hdr->nops++;
1980 hdr->replen += decode_secinfo_no_name_maxsz;
1981 return 0;
1982}
1983
1984static void encode_test_stateid(struct xdr_stream *xdr,
1985 struct nfs41_test_stateid_args *args,
1986 struct compound_hdr *hdr)
1987{
1988 __be32 *p;
1989
1990 p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
1991 *p++ = cpu_to_be32(OP_TEST_STATEID);
1992 *p++ = cpu_to_be32(1);
1993 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1994 hdr->nops++;
1995 hdr->replen += decode_test_stateid_maxsz;
1996}
1997
1998static void encode_free_stateid(struct xdr_stream *xdr,
1999 struct nfs41_free_stateid_args *args,
2000 struct compound_hdr *hdr)
2001{
2002 __be32 *p;
2003 p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
2004 *p++ = cpu_to_be32(OP_FREE_STATEID);
2005 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2006 hdr->nops++;
2007 hdr->replen += decode_free_stateid_maxsz;
2008}
1941#endif /* CONFIG_NFS_V4_1 */ 2009#endif /* CONFIG_NFS_V4_1 */
1942 2010
1943/* 2011/*
@@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
2790 encode_layoutreturn(xdr, args, &hdr); 2858 encode_layoutreturn(xdr, args, &hdr);
2791 encode_nops(&hdr); 2859 encode_nops(&hdr);
2792} 2860}
2861
2862/*
2863 * Encode SECINFO_NO_NAME request
2864 */
2865static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
2866 struct xdr_stream *xdr,
2867 struct nfs41_secinfo_no_name_args *args)
2868{
2869 struct compound_hdr hdr = {
2870 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2871 };
2872
2873 encode_compound_hdr(xdr, req, &hdr);
2874 encode_sequence(xdr, &args->seq_args, &hdr);
2875 encode_putrootfh(xdr, &hdr);
2876 encode_secinfo_no_name(xdr, args, &hdr);
2877 encode_nops(&hdr);
2878 return 0;
2879}
2880
2881/*
2882 * Encode TEST_STATEID request
2883 */
2884static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
2885 struct xdr_stream *xdr,
2886 struct nfs41_test_stateid_args *args)
2887{
2888 struct compound_hdr hdr = {
2889 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2890 };
2891
2892 encode_compound_hdr(xdr, req, &hdr);
2893 encode_sequence(xdr, &args->seq_args, &hdr);
2894 encode_test_stateid(xdr, args, &hdr);
2895 encode_nops(&hdr);
2896}
2897
2898/*
2899 * Encode FREE_STATEID request
2900 */
2901static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
2902 struct xdr_stream *xdr,
2903 struct nfs41_free_stateid_args *args)
2904{
2905 struct compound_hdr hdr = {
2906 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2907 };
2908
2909 encode_compound_hdr(xdr, req, &hdr);
2910 encode_sequence(xdr, &args->seq_args, &hdr);
2911 encode_free_stateid(xdr, args, &hdr);
2912 encode_nops(&hdr);
2913}
2793#endif /* CONFIG_NFS_V4_1 */ 2914#endif /* CONFIG_NFS_V4_1 */
2794 2915
2795static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2916static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4977 if (unlikely(status)) 5098 if (unlikely(status))
4978 return status; 5099 return status;
4979 5100
4980 /* Throw away server_scope */ 5101 /* Save server_scope */
4981 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5102 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4982 if (unlikely(status)) 5103 if (unlikely(status))
4983 return status; 5104 return status;
4984 5105
5106 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5107 return -EIO;
5108
5109 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5110 res->server_scope->server_scope_sz = dummy;
5111
4985 /* Throw away Implementation id array */ 5112 /* Throw away Implementation id array */
4986 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5113 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4987 if (unlikely(status)) 5114 if (unlikely(status))
@@ -5322,6 +5449,55 @@ out_overflow:
5322 print_overflow_msg(__func__, xdr); 5449 print_overflow_msg(__func__, xdr);
5323 return -EIO; 5450 return -EIO;
5324} 5451}
5452
5453static int decode_test_stateid(struct xdr_stream *xdr,
5454 struct nfs41_test_stateid_res *res)
5455{
5456 __be32 *p;
5457 int status;
5458 int num_res;
5459
5460 status = decode_op_hdr(xdr, OP_TEST_STATEID);
5461 if (status)
5462 return status;
5463
5464 p = xdr_inline_decode(xdr, 4);
5465 if (unlikely(!p))
5466 goto out_overflow;
5467 num_res = be32_to_cpup(p++);
5468 if (num_res != 1)
5469 goto out;
5470
5471 p = xdr_inline_decode(xdr, 4);
5472 if (unlikely(!p))
5473 goto out_overflow;
5474 res->status = be32_to_cpup(p++);
5475 return res->status;
5476out_overflow:
5477 print_overflow_msg(__func__, xdr);
5478out:
5479 return -EIO;
5480}
5481
5482static int decode_free_stateid(struct xdr_stream *xdr,
5483 struct nfs41_free_stateid_res *res)
5484{
5485 __be32 *p;
5486 int status;
5487
5488 status = decode_op_hdr(xdr, OP_FREE_STATEID);
5489 if (status)
5490 return status;
5491
5492 p = xdr_inline_decode(xdr, 4);
5493 if (unlikely(!p))
5494 goto out_overflow;
5495 res->status = be32_to_cpup(p++);
5496 return res->status;
5497out_overflow:
5498 print_overflow_msg(__func__, xdr);
5499 return -EIO;
5500}
5325#endif /* CONFIG_NFS_V4_1 */ 5501#endif /* CONFIG_NFS_V4_1 */
5326 5502
5327/* 5503/*
@@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6461out: 6637out:
6462 return status; 6638 return status;
6463} 6639}
6640
6641/*
6642 * Decode SECINFO_NO_NAME response
6643 */
6644static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
6645 struct xdr_stream *xdr,
6646 struct nfs4_secinfo_res *res)
6647{
6648 struct compound_hdr hdr;
6649 int status;
6650
6651 status = decode_compound_hdr(xdr, &hdr);
6652 if (status)
6653 goto out;
6654 status = decode_sequence(xdr, &res->seq_res, rqstp);
6655 if (status)
6656 goto out;
6657 status = decode_putrootfh(xdr);
6658 if (status)
6659 goto out;
6660 status = decode_secinfo(xdr, res);
6661out:
6662 return status;
6663}
6664
6665/*
6666 * Decode TEST_STATEID response
6667 */
6668static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
6669 struct xdr_stream *xdr,
6670 struct nfs41_test_stateid_res *res)
6671{
6672 struct compound_hdr hdr;
6673 int status;
6674
6675 status = decode_compound_hdr(xdr, &hdr);
6676 if (status)
6677 goto out;
6678 status = decode_sequence(xdr, &res->seq_res, rqstp);
6679 if (status)
6680 goto out;
6681 status = decode_test_stateid(xdr, res);
6682out:
6683 return status;
6684}
6685
6686/*
6687 * Decode FREE_STATEID response
6688 */
6689static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
6690 struct xdr_stream *xdr,
6691 struct nfs41_free_stateid_res *res)
6692{
6693 struct compound_hdr hdr;
6694 int status;
6695
6696 status = decode_compound_hdr(xdr, &hdr);
6697 if (status)
6698 goto out;
6699 status = decode_sequence(xdr, &res->seq_res, rqstp);
6700 if (status)
6701 goto out;
6702 status = decode_free_stateid(xdr, res);
6703out:
6704 return status;
6705}
6464#endif /* CONFIG_NFS_V4_1 */ 6706#endif /* CONFIG_NFS_V4_1 */
6465 6707
6466/** 6708/**
@@ -6663,6 +6905,9 @@ struct rpc_procinfo nfs4_procedures[] = {
6663 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6905 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6664 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), 6906 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6665 PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), 6907 PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
6908 PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
6909 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
6910 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
6666#endif /* CONFIG_NFS_V4_1 */ 6911#endif /* CONFIG_NFS_V4_1 */
6667}; 6912};
6668 6913
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3f10ef..9383ca7245bc 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
1000 if (!pnfs_generic_pg_test(pgio, prev, req)) 1000 if (!pnfs_generic_pg_test(pgio, prev, req))
1001 return false; 1001 return false;
1002 1002
1003 if (pgio->pg_lseg == NULL)
1004 return true;
1005
1006 return pgio->pg_count + req->wb_bytes <= 1003 return pgio->pg_count + req->wb_bytes <=
1007 OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 1004 OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
1008} 1005}
1009 1006
1007static const struct nfs_pageio_ops objio_pg_read_ops = {
1008 .pg_init = pnfs_generic_pg_init_read,
1009 .pg_test = objio_pg_test,
1010 .pg_doio = pnfs_generic_pg_readpages,
1011};
1012
1013static const struct nfs_pageio_ops objio_pg_write_ops = {
1014 .pg_init = pnfs_generic_pg_init_write,
1015 .pg_test = objio_pg_test,
1016 .pg_doio = pnfs_generic_pg_writepages,
1017};
1018
1010static struct pnfs_layoutdriver_type objlayout_type = { 1019static struct pnfs_layoutdriver_type objlayout_type = {
1011 .id = LAYOUT_OSD2_OBJECTS, 1020 .id = LAYOUT_OSD2_OBJECTS,
1012 .name = "LAYOUT_OSD2_OBJECTS", 1021 .name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
1020 1029
1021 .read_pagelist = objlayout_read_pagelist, 1030 .read_pagelist = objlayout_read_pagelist,
1022 .write_pagelist = objlayout_write_pagelist, 1031 .write_pagelist = objlayout_write_pagelist,
1023 .pg_test = objio_pg_test, 1032 .pg_read_ops = &objio_pg_read_ops,
1033 .pg_write_ops = &objio_pg_write_ops,
1024 1034
1025 .free_deviceid_node = objio_free_deviceid_node, 1035 .free_deviceid_node = objio_free_deviceid_node,
1026 1036
@@ -1055,5 +1065,7 @@ objlayout_exit(void)
1055 __func__); 1065 __func__);
1056} 1066}
1057 1067
1068MODULE_ALIAS("nfs-layouttype4-2");
1069
1058module_init(objlayout_init); 1070module_init(objlayout_init);
1059module_exit(objlayout_exit); 1071module_exit(objlayout_exit);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18449f43c568..b60970cc7f1f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
230 */ 230 */
231void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 231void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
232 struct inode *inode, 232 struct inode *inode,
233 int (*doio)(struct nfs_pageio_descriptor *), 233 const struct nfs_pageio_ops *pg_ops,
234 size_t bsize, 234 size_t bsize,
235 int io_flags) 235 int io_flags)
236{ 236{
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 desc->pg_bsize = bsize; 240 desc->pg_bsize = bsize;
241 desc->pg_base = 0; 241 desc->pg_base = 0;
242 desc->pg_moreio = 0; 242 desc->pg_moreio = 0;
243 desc->pg_recoalesce = 0;
243 desc->pg_inode = inode; 244 desc->pg_inode = inode;
244 desc->pg_doio = doio; 245 desc->pg_ops = pg_ops;
245 desc->pg_ioflags = io_flags; 246 desc->pg_ioflags = io_flags;
246 desc->pg_error = 0; 247 desc->pg_error = 0;
247 desc->pg_lseg = NULL; 248 desc->pg_lseg = NULL;
248 desc->pg_test = nfs_generic_pg_test;
249 pnfs_pageio_init(desc, inode);
250} 249}
251 250
252/** 251/**
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
276 return false; 275 return false;
277 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 276 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
278 return false; 277 return false;
279 return pgio->pg_test(pgio, prev, req); 278 return pgio->pg_ops->pg_test(pgio, prev, req);
280} 279}
281 280
282/** 281/**
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
297 if (!nfs_can_coalesce_requests(prev, req, desc)) 296 if (!nfs_can_coalesce_requests(prev, req, desc))
298 return 0; 297 return 0;
299 } else { 298 } else {
299 if (desc->pg_ops->pg_init)
300 desc->pg_ops->pg_init(desc, req);
300 desc->pg_base = req->wb_pgbase; 301 desc->pg_base = req->wb_pgbase;
301 } 302 }
302 nfs_list_remove_request(req); 303 nfs_list_remove_request(req);
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
311static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 312static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
312{ 313{
313 if (!list_empty(&desc->pg_list)) { 314 if (!list_empty(&desc->pg_list)) {
314 int error = desc->pg_doio(desc); 315 int error = desc->pg_ops->pg_doio(desc);
315 if (error < 0) 316 if (error < 0)
316 desc->pg_error = error; 317 desc->pg_error = error;
317 else 318 else
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
331 * Returns true if the request 'req' was successfully coalesced into the 332 * Returns true if the request 'req' was successfully coalesced into the
332 * existing list of pages 'desc'. 333 * existing list of pages 'desc'.
333 */ 334 */
334int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 335static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
340 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
341 return 0; 342 return 0;
342 desc->pg_moreio = 0; 343 desc->pg_moreio = 0;
344 if (desc->pg_recoalesce)
345 return 0;
343 } 346 }
344 return 1; 347 return 1;
345} 348}
346 349
350static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
351{
352 LIST_HEAD(head);
353
354 do {
355 list_splice_init(&desc->pg_list, &head);
356 desc->pg_bytes_written -= desc->pg_count;
357 desc->pg_count = 0;
358 desc->pg_base = 0;
359 desc->pg_recoalesce = 0;
360
361 while (!list_empty(&head)) {
362 struct nfs_page *req;
363
364 req = list_first_entry(&head, struct nfs_page, wb_list);
365 nfs_list_remove_request(req);
366 if (__nfs_pageio_add_request(desc, req))
367 continue;
368 if (desc->pg_error < 0)
369 return 0;
370 break;
371 }
372 } while (desc->pg_recoalesce);
373 return 1;
374}
375
376int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
377 struct nfs_page *req)
378{
379 int ret;
380
381 do {
382 ret = __nfs_pageio_add_request(desc, req);
383 if (ret)
384 break;
385 if (desc->pg_error < 0)
386 break;
387 ret = nfs_do_recoalesce(desc);
388 } while (ret);
389 return ret;
390}
391
347/** 392/**
348 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 393 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
349 * @desc: pointer to io descriptor 394 * @desc: pointer to io descriptor
350 */ 395 */
351void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 396void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
352{ 397{
353 nfs_pageio_doio(desc); 398 for (;;) {
399 nfs_pageio_doio(desc);
400 if (!desc->pg_recoalesce)
401 break;
402 if (!nfs_do_recoalesce(desc))
403 break;
404 }
354} 405}
355 406
356/** 407/**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
369 if (!list_empty(&desc->pg_list)) { 420 if (!list_empty(&desc->pg_list)) {
370 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 421 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
371 if (index != prev->wb_index + 1) 422 if (index != prev->wb_index + 1)
372 nfs_pageio_doio(desc); 423 nfs_pageio_complete(desc);
373 } 424 }
374} 425}
375 426
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 29c0ca7fc347..38e5508555c6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
28 */ 28 */
29 29
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include <linux/nfs_page.h>
31#include "internal.h" 32#include "internal.h"
32#include "pnfs.h" 33#include "pnfs.h"
33#include "iostat.h" 34#include "iostat.h"
@@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
448void 449void
449pnfs_destroy_all_layouts(struct nfs_client *clp) 450pnfs_destroy_all_layouts(struct nfs_client *clp)
450{ 451{
452 struct nfs_server *server;
451 struct pnfs_layout_hdr *lo; 453 struct pnfs_layout_hdr *lo;
452 LIST_HEAD(tmp_list); 454 LIST_HEAD(tmp_list);
453 455
456 nfs4_deviceid_mark_client_invalid(clp);
457 nfs4_deviceid_purge_client(clp);
458
454 spin_lock(&clp->cl_lock); 459 spin_lock(&clp->cl_lock);
455 list_splice_init(&clp->cl_layouts, &tmp_list); 460 rcu_read_lock();
461 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
462 if (!list_empty(&server->layouts))
463 list_splice_init(&server->layouts, &tmp_list);
464 }
465 rcu_read_unlock();
456 spin_unlock(&clp->cl_lock); 466 spin_unlock(&clp->cl_lock);
457 467
458 while (!list_empty(&tmp_list)) { 468 while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino)
661 lrp->args.stateid = stateid; 671 lrp->args.stateid = stateid;
662 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 672 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
663 lrp->args.inode = ino; 673 lrp->args.inode = ino;
674 lrp->args.layout = lo;
664 lrp->clp = NFS_SERVER(ino)->nfs_client; 675 lrp->clp = NFS_SERVER(ino)->nfs_client;
665 676
666 status = nfs4_proc_layoutreturn(lrp); 677 status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
920 }; 931 };
921 unsigned pg_offset; 932 unsigned pg_offset;
922 struct nfs_inode *nfsi = NFS_I(ino); 933 struct nfs_inode *nfsi = NFS_I(ino);
923 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 934 struct nfs_server *server = NFS_SERVER(ino);
935 struct nfs_client *clp = server->nfs_client;
924 struct pnfs_layout_hdr *lo; 936 struct pnfs_layout_hdr *lo;
925 struct pnfs_layout_segment *lseg = NULL; 937 struct pnfs_layout_segment *lseg = NULL;
926 bool first = false; 938 bool first = false;
@@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
964 */ 976 */
965 spin_lock(&clp->cl_lock); 977 spin_lock(&clp->cl_lock);
966 BUG_ON(!list_empty(&lo->plh_layouts)); 978 BUG_ON(!list_empty(&lo->plh_layouts));
967 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 979 list_add_tail(&lo->plh_layouts, &server->layouts);
968 spin_unlock(&clp->cl_lock); 980 spin_unlock(&clp->cl_lock);
969 } 981 }
970 982
@@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
973 arg.offset -= pg_offset; 985 arg.offset -= pg_offset;
974 arg.length += pg_offset; 986 arg.length += pg_offset;
975 } 987 }
976 arg.length = PAGE_CACHE_ALIGN(arg.length); 988 if (arg.length != NFS4_MAX_UINT64)
989 arg.length = PAGE_CACHE_ALIGN(arg.length);
977 990
978 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 991 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
979 if (!lseg && first) { 992 if (!lseg && first) {
@@ -991,6 +1004,7 @@ out_unlock:
991 spin_unlock(&ino->i_lock); 1004 spin_unlock(&ino->i_lock);
992 goto out; 1005 goto out;
993} 1006}
1007EXPORT_SYMBOL_GPL(pnfs_update_layout);
994 1008
995int 1009int
996pnfs_layout_process(struct nfs4_layoutget *lgp) 1010pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@ out_forget_reply:
1048 goto out; 1062 goto out;
1049} 1063}
1050 1064
1065void
1066pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1067{
1068 BUG_ON(pgio->pg_lseg != NULL);
1069
1070 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1071 req->wb_context,
1072 req_offset(req),
1073 req->wb_bytes,
1074 IOMODE_READ,
1075 GFP_KERNEL);
1076 /* If no lseg, fall back to read through mds */
1077 if (pgio->pg_lseg == NULL)
1078 nfs_pageio_reset_read_mds(pgio);
1079
1080}
1081EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
1082
1083void
1084pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1085{
1086 BUG_ON(pgio->pg_lseg != NULL);
1087
1088 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1089 req->wb_context,
1090 req_offset(req),
1091 req->wb_bytes,
1092 IOMODE_RW,
1093 GFP_NOFS);
1094 /* If no lseg, fall back to write through mds */
1095 if (pgio->pg_lseg == NULL)
1096 nfs_pageio_reset_write_mds(pgio);
1097}
1098EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1099
1051bool 1100bool
1052pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1101pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
1053 struct nfs_page *req)
1054{ 1102{
1055 enum pnfs_iomode access_type; 1103 struct nfs_server *server = NFS_SERVER(inode);
1056 gfp_t gfp_flags; 1104 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1057 1105
1058 /* We assume that pg_ioflags == 0 iff we're reading a page */ 1106 if (ld == NULL)
1059 if (pgio->pg_ioflags == 0) { 1107 return false;
1060 access_type = IOMODE_READ; 1108 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
1061 gfp_flags = GFP_KERNEL; 1109 return true;
1062 } else { 1110}
1063 access_type = IOMODE_RW;
1064 gfp_flags = GFP_NOFS;
1065 }
1066 1111
1067 if (pgio->pg_lseg == NULL) { 1112bool
1068 if (pgio->pg_count != prev->wb_bytes) 1113pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
1069 return true; 1114{
1070 /* This is first coelesce call for a series of nfs_pages */ 1115 struct nfs_server *server = NFS_SERVER(inode);
1071 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1116 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1072 prev->wb_context, 1117
1073 req_offset(prev), 1118 if (ld == NULL)
1074 pgio->pg_count, 1119 return false;
1075 access_type, 1120 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
1076 gfp_flags); 1121 return true;
1077 if (pgio->pg_lseg == NULL) 1122}
1078 return true; 1123
1079 } 1124bool
1125pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1126 struct nfs_page *req)
1127{
1128 if (pgio->pg_lseg == NULL)
1129 return nfs_generic_pg_test(pgio, prev, req);
1080 1130
1081 /* 1131 /*
1082 * Test if a nfs_page is fully contained in the pnfs_layout_range. 1132 * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
1120} 1170}
1121EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1171EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1122 1172
1123enum pnfs_try_status 1173static void
1174pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1175 struct nfs_write_data *data)
1176{
1177 list_splice_tail_init(&data->pages, &desc->pg_list);
1178 if (data->req && list_empty(&data->req->wb_list))
1179 nfs_list_add_request(data->req, &desc->pg_list);
1180 nfs_pageio_reset_write_mds(desc);
1181 desc->pg_recoalesce = 1;
1182 nfs_writedata_release(data);
1183}
1184
1185static enum pnfs_try_status
1124pnfs_try_to_write_data(struct nfs_write_data *wdata, 1186pnfs_try_to_write_data(struct nfs_write_data *wdata,
1125 const struct rpc_call_ops *call_ops, int how) 1187 const struct rpc_call_ops *call_ops,
1188 struct pnfs_layout_segment *lseg,
1189 int how)
1126{ 1190{
1127 struct inode *inode = wdata->inode; 1191 struct inode *inode = wdata->inode;
1128 enum pnfs_try_status trypnfs; 1192 enum pnfs_try_status trypnfs;
1129 struct nfs_server *nfss = NFS_SERVER(inode); 1193 struct nfs_server *nfss = NFS_SERVER(inode);
1130 1194
1131 wdata->mds_ops = call_ops; 1195 wdata->mds_ops = call_ops;
1196 wdata->lseg = get_lseg(lseg);
1132 1197
1133 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1198 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1134 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1199 inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1144 return trypnfs; 1209 return trypnfs;
1145} 1210}
1146 1211
1212static void
1213pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
1214{
1215 struct nfs_write_data *data;
1216 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1217 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1218
1219 desc->pg_lseg = NULL;
1220 while (!list_empty(head)) {
1221 enum pnfs_try_status trypnfs;
1222
1223 data = list_entry(head->next, struct nfs_write_data, list);
1224 list_del_init(&data->list);
1225
1226 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1227 if (trypnfs == PNFS_NOT_ATTEMPTED)
1228 pnfs_write_through_mds(desc, data);
1229 }
1230 put_lseg(lseg);
1231}
1232
1233int
1234pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1235{
1236 LIST_HEAD(head);
1237 int ret;
1238
1239 ret = nfs_generic_flush(desc, &head);
1240 if (ret != 0) {
1241 put_lseg(desc->pg_lseg);
1242 desc->pg_lseg = NULL;
1243 return ret;
1244 }
1245 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
1246 return 0;
1247}
1248EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1249
1147/* 1250/*
1148 * Called by non rpc-based layout drivers 1251 * Called by non rpc-based layout drivers
1149 */ 1252 */
@@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
1167} 1270}
1168EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1271EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1169 1272
1273static void
1274pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1275 struct nfs_read_data *data)
1276{
1277 list_splice_tail_init(&data->pages, &desc->pg_list);
1278 if (data->req && list_empty(&data->req->wb_list))
1279 nfs_list_add_request(data->req, &desc->pg_list);
1280 nfs_pageio_reset_read_mds(desc);
1281 desc->pg_recoalesce = 1;
1282 nfs_readdata_release(data);
1283}
1284
1170/* 1285/*
1171 * Call the appropriate parallel I/O subsystem read function. 1286 * Call the appropriate parallel I/O subsystem read function.
1172 */ 1287 */
1173enum pnfs_try_status 1288static enum pnfs_try_status
1174pnfs_try_to_read_data(struct nfs_read_data *rdata, 1289pnfs_try_to_read_data(struct nfs_read_data *rdata,
1175 const struct rpc_call_ops *call_ops) 1290 const struct rpc_call_ops *call_ops,
1291 struct pnfs_layout_segment *lseg)
1176{ 1292{
1177 struct inode *inode = rdata->inode; 1293 struct inode *inode = rdata->inode;
1178 struct nfs_server *nfss = NFS_SERVER(inode); 1294 struct nfs_server *nfss = NFS_SERVER(inode);
1179 enum pnfs_try_status trypnfs; 1295 enum pnfs_try_status trypnfs;
1180 1296
1181 rdata->mds_ops = call_ops; 1297 rdata->mds_ops = call_ops;
1298 rdata->lseg = get_lseg(lseg);
1182 1299
1183 dprintk("%s: Reading ino:%lu %u@%llu\n", 1300 dprintk("%s: Reading ino:%lu %u@%llu\n",
1184 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1301 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1194 return trypnfs; 1311 return trypnfs;
1195} 1312}
1196 1313
1314static void
1315pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
1316{
1317 struct nfs_read_data *data;
1318 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1319 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1320
1321 desc->pg_lseg = NULL;
1322 while (!list_empty(head)) {
1323 enum pnfs_try_status trypnfs;
1324
1325 data = list_entry(head->next, struct nfs_read_data, list);
1326 list_del_init(&data->list);
1327
1328 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1329 if (trypnfs == PNFS_NOT_ATTEMPTED)
1330 pnfs_read_through_mds(desc, data);
1331 }
1332 put_lseg(lseg);
1333}
1334
1335int
1336pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1337{
1338 LIST_HEAD(head);
1339 int ret;
1340
1341 ret = nfs_generic_pagein(desc, &head);
1342 if (ret != 0) {
1343 put_lseg(desc->pg_lseg);
1344 desc->pg_lseg = NULL;
1345 return ret;
1346 }
1347 pnfs_do_multiple_reads(desc, &head);
1348 return 0;
1349}
1350EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1351
1197/* 1352/*
1198 * Currently there is only one (whole file) write lseg. 1353 * Currently there is only one (whole file) write lseg.
1199 */ 1354 */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96bf4e6f45be..078670dfbe04 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
87 void (*free_lseg) (struct pnfs_layout_segment *lseg); 87 void (*free_lseg) (struct pnfs_layout_segment *lseg);
88 88
89 /* test for nfs page cache coalescing */ 89 /* test for nfs page cache coalescing */
90 bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 90 const struct nfs_pageio_ops *pg_read_ops;
91 const struct nfs_pageio_ops *pg_write_ops;
91 92
92 /* Returns true if layoutdriver wants to divert this request to 93 /* Returns true if layoutdriver wants to divert this request to
93 * driver's commit routine. 94 * driver's commit routine.
@@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
148/* pnfs.c */ 149/* pnfs.c */
149void get_layout_hdr(struct pnfs_layout_hdr *lo); 150void get_layout_hdr(struct pnfs_layout_hdr *lo);
150void put_lseg(struct pnfs_layout_segment *lseg); 151void put_lseg(struct pnfs_layout_segment *lseg);
151struct pnfs_layout_segment * 152
152pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 153bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
153 loff_t pos, u64 count, enum pnfs_iomode access_type, 154bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
154 gfp_t gfp_flags); 155
155void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 156void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
156void unset_pnfs_layoutdriver(struct nfs_server *); 157void unset_pnfs_layoutdriver(struct nfs_server *);
157enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 158void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
158 const struct rpc_call_ops *, int); 159int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
159enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, 160void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
160 const struct rpc_call_ops *); 161int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
161bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 162bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
162int pnfs_layout_process(struct nfs4_layoutget *lgp); 163int pnfs_layout_process(struct nfs4_layoutget *lgp);
163void pnfs_free_lseg_list(struct list_head *tmp_list); 164void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
182int _pnfs_return_layout(struct inode *); 183int _pnfs_return_layout(struct inode *);
183int pnfs_ld_write_done(struct nfs_write_data *); 184int pnfs_ld_write_done(struct nfs_write_data *);
184int pnfs_ld_read_done(struct nfs_read_data *); 185int pnfs_ld_read_done(struct nfs_read_data *);
186struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
187 struct nfs_open_context *ctx,
188 loff_t pos,
189 u64 count,
190 enum pnfs_iomode iomode,
191 gfp_t gfp_flags);
192
193void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
194
195/* nfs4_deviceid_flags */
196enum {
197 NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
198};
185 199
186/* pnfs_dev.c */ 200/* pnfs_dev.c */
187struct nfs4_deviceid_node { 201struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@ struct nfs4_deviceid_node {
189 struct hlist_node tmpnode; 203 struct hlist_node tmpnode;
190 const struct pnfs_layoutdriver_type *ld; 204 const struct pnfs_layoutdriver_type *ld;
191 const struct nfs_client *nfs_client; 205 const struct nfs_client *nfs_client;
206 unsigned long flags;
192 struct nfs4_deviceid deviceid; 207 struct nfs4_deviceid deviceid;
193 atomic_t ref; 208 atomic_t ref;
194}; 209};
195 210
196void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 211void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
197struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 212struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
198struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
199void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 213void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
200void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 214void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
201 const struct pnfs_layoutdriver_type *, 215 const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino)
293 return 0; 307 return 0;
294} 308}
295 309
296static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
297 struct inode *inode)
298{
299 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
300
301 if (ld)
302 pgio->pg_test = ld->pg_test;
303}
304
305#else /* CONFIG_NFS_V4_1 */ 310#else /* CONFIG_NFS_V4_1 */
306 311
307static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 312static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
322{ 327{
323} 328}
324 329
325static inline struct pnfs_layout_segment *
326pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
327 loff_t pos, u64 count, enum pnfs_iomode access_type,
328 gfp_t gfp_flags)
329{
330 return NULL;
331}
332
333static inline enum pnfs_try_status
334pnfs_try_to_read_data(struct nfs_read_data *data,
335 const struct rpc_call_ops *call_ops)
336{
337 return PNFS_NOT_ATTEMPTED;
338}
339
340static inline enum pnfs_try_status
341pnfs_try_to_write_data(struct nfs_write_data *data,
342 const struct rpc_call_ops *call_ops, int how)
343{
344 return PNFS_NOT_ATTEMPTED;
345}
346
347static inline int pnfs_return_layout(struct inode *ino) 330static inline int pnfs_return_layout(struct inode *ino)
348{ 331{
349 return 0; 332 return 0;
@@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
385{ 368{
386} 369}
387 370
388static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 371static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
389 struct inode *inode)
390{ 372{
373 return false;
374}
375
376static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
377{
378 return false;
391} 379}
392 380
393static inline void 381static inline void
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index f0f8e1e22f6c..6fda5228ef56 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
100 100
101 rcu_read_lock(); 101 rcu_read_lock();
102 d = _lookup_deviceid(ld, clp, id, hash); 102 d = _lookup_deviceid(ld, clp, id, hash);
103 if (d && !atomic_inc_not_zero(&d->ref)) 103 if (d != NULL)
104 d = NULL; 104 atomic_inc(&d->ref);
105 rcu_read_unlock(); 105 rcu_read_unlock();
106 return d; 106 return d;
107} 107}
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
115EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); 115EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
116 116
117/* 117/*
118 * Unhash and put deviceid 118 * Remove a deviceid from cache
119 * 119 *
120 * @clp nfs_client associated with deviceid 120 * @clp nfs_client associated with deviceid
121 * @id the deviceid to unhash 121 * @id the deviceid to unhash
122 * 122 *
123 * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. 123 * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
124 */ 124 */
125struct nfs4_deviceid_node * 125void
126nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, 126nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
127 const struct nfs_client *clp, const struct nfs4_deviceid *id) 127 const struct nfs_client *clp, const struct nfs4_deviceid *id)
128{ 128{
129 struct nfs4_deviceid_node *d; 129 struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
134 rcu_read_unlock(); 134 rcu_read_unlock();
135 if (!d) { 135 if (!d) {
136 spin_unlock(&nfs4_deviceid_lock); 136 spin_unlock(&nfs4_deviceid_lock);
137 return NULL; 137 return;
138 } 138 }
139 hlist_del_init_rcu(&d->node); 139 hlist_del_init_rcu(&d->node);
140 spin_unlock(&nfs4_deviceid_lock); 140 spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
142 142
143 /* balance the initial ref set in pnfs_insert_deviceid */ 143 /* balance the initial ref set in pnfs_insert_deviceid */
144 if (atomic_dec_and_test(&d->ref)) 144 if (atomic_dec_and_test(&d->ref))
145 return d; 145 d->ld->free_deviceid_node(d);
146
147 return NULL;
148}
149EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
150
151/*
152 * Delete a deviceid from cache
153 *
154 * @clp struct nfs_client qualifying the deviceid
155 * @id deviceid to delete
156 */
157void
158nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
159 const struct nfs_client *clp, const struct nfs4_deviceid *id)
160{
161 struct nfs4_deviceid_node *d;
162
163 d = nfs4_unhash_put_deviceid(ld, clp, id);
164 if (!d)
165 return;
166 d->ld->free_deviceid_node(d);
167} 146}
168EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); 147EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
169 148
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
177 INIT_HLIST_NODE(&d->tmpnode); 156 INIT_HLIST_NODE(&d->tmpnode);
178 d->ld = ld; 157 d->ld = ld;
179 d->nfs_client = nfs_client; 158 d->nfs_client = nfs_client;
159 d->flags = 0;
180 d->deviceid = *id; 160 d->deviceid = *id;
181 atomic_set(&d->ref, 1); 161 atomic_set(&d->ref, 1);
182} 162}
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
221 * 201 *
222 * @d deviceid node to put 202 * @d deviceid node to put
223 * 203 *
224 * @ret true iff the node was deleted 204 * return true iff the node was deleted
205 * Note that since the test for d->ref == 0 is sufficient to establish
206 * that the node is no longer hashed in the global device id cache.
225 */ 207 */
226bool 208bool
227nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) 209nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
228{ 210{
229 if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) 211 if (!atomic_dec_and_test(&d->ref))
230 return false; 212 return false;
231 hlist_del_init_rcu(&d->node);
232 spin_unlock(&nfs4_deviceid_lock);
233 synchronize_rcu();
234 d->ld->free_deviceid_node(d); 213 d->ld->free_deviceid_node(d);
235 return true; 214 return true;
236} 215}
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
275 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) 254 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
276 _deviceid_purge_client(clp, h); 255 _deviceid_purge_client(clp, h);
277} 256}
257
258/*
259 * Stop use of all deviceids associated with an nfs_client
260 */
261void
262nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
263{
264 struct nfs4_deviceid_node *d;
265 struct hlist_node *n;
266 int i;
267
268 rcu_read_lock();
269 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
270 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
271 if (d->nfs_client == clp)
272 set_bit(NFS_DEVICEID_INVALID, &d->flags);
273 }
274 rcu_read_unlock();
275}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a68679f538fc..2171c043ab08 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,7 @@
30 30
31#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
32 32
33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); 33static const struct nfs_pageio_ops nfs_pageio_read_ops;
34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
35static const struct rpc_call_ops nfs_read_partial_ops; 34static const struct rpc_call_ops nfs_read_partial_ops;
36static const struct rpc_call_ops nfs_read_full_ops; 35static const struct rpc_call_ops nfs_read_full_ops;
37 36
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
68 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
69} 68}
70 69
71static void nfs_readdata_release(struct nfs_read_data *rdata) 70void nfs_readdata_release(struct nfs_read_data *rdata)
72{ 71{
73 put_lseg(rdata->lseg); 72 put_lseg(rdata->lseg);
74 put_nfs_open_context(rdata->args.context); 73 put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
113 } 112 }
114} 113}
115 114
115static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
116 struct inode *inode)
117{
118 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
119 NFS_SERVER(inode)->rsize, 0);
120}
121
122void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
123{
124 pgio->pg_ops = &nfs_pageio_read_ops;
125 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
126}
127EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
128
129static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
130 struct inode *inode)
131{
132 if (!pnfs_pageio_init_read(pgio, inode))
133 nfs_pageio_init_read_mds(pgio, inode);
134}
135
116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 136int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
117 struct page *page) 137 struct page *page)
118{ 138{
@@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
131 if (len < PAGE_CACHE_SIZE) 151 if (len < PAGE_CACHE_SIZE)
132 zero_user_segment(page, len, PAGE_CACHE_SIZE); 152 zero_user_segment(page, len, PAGE_CACHE_SIZE);
133 153
134 nfs_pageio_init(&pgio, inode, NULL, 0, 0); 154 nfs_pageio_init_read(&pgio, inode);
135 nfs_list_add_request(new, &pgio.pg_list); 155 nfs_pageio_add_request(&pgio, new);
136 pgio.pg_count = len; 156 nfs_pageio_complete(&pgio);
137
138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
139 nfs_pagein_multi(&pgio);
140 else
141 nfs_pagein_one(&pgio);
142 return 0; 157 return 0;
143} 158}
144 159
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
202/* 217/*
203 * Set up the NFS read request struct 218 * Set up the NFS read request struct
204 */ 219 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 220static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops, 221 unsigned int count, unsigned int offset)
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{ 222{
210 struct inode *inode = req->wb_context->dentry->d_inode; 223 struct inode *inode = req->wb_context->dentry->d_inode;
211 224
212 data->req = req; 225 data->req = req;
213 data->inode = inode; 226 data->inode = inode;
214 data->cred = req->wb_context->cred; 227 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
216 228
217 data->args.fh = NFS_FH(inode); 229 data->args.fh = NFS_FH(inode);
218 data->args.offset = req_offset(req) + offset; 230 data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
226 data->res.count = count; 238 data->res.count = count;
227 data->res.eof = 0; 239 data->res.eof = 0;
228 nfs_fattr_init(&data->fattr); 240 nfs_fattr_init(&data->fattr);
241}
229 242
230 if (data->lseg && 243static int nfs_do_read(struct nfs_read_data *data,
231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) 244 const struct rpc_call_ops *call_ops)
232 return 0; 245{
246 struct inode *inode = data->args.context->dentry->d_inode;
233 247
234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 248 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
235} 249}
236 250
251static int
252nfs_do_multiple_reads(struct list_head *head,
253 const struct rpc_call_ops *call_ops)
254{
255 struct nfs_read_data *data;
256 int ret = 0;
257
258 while (!list_empty(head)) {
259 int ret2;
260
261 data = list_entry(head->next, struct nfs_read_data, list);
262 list_del_init(&data->list);
263
264 ret2 = nfs_do_read(data, call_ops);
265 if (ret == 0)
266 ret = ret2;
267 }
268 return ret;
269}
270
237static void 271static void
238nfs_async_read_error(struct list_head *head) 272nfs_async_read_error(struct list_head *head)
239{ 273{
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head)
260 * won't see the new data until our attribute cache is updated. This is more 294 * won't see the new data until our attribute cache is updated. This is more
261 * or less conventional NFS client behavior. 295 * or less conventional NFS client behavior.
262 */ 296 */
263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) 297static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
264{ 298{
265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 299 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
266 struct page *page = req->wb_page; 300 struct page *page = req->wb_page;
267 struct nfs_read_data *data; 301 struct nfs_read_data *data;
268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; 302 size_t rsize = desc->pg_bsize, nbytes;
269 unsigned int offset; 303 unsigned int offset;
270 int requests = 0; 304 int requests = 0;
271 int ret = 0; 305 int ret = 0;
272 struct pnfs_layout_segment *lseg;
273 LIST_HEAD(list);
274 306
275 nfs_list_remove_request(req); 307 nfs_list_remove_request(req);
276 308
309 offset = 0;
277 nbytes = desc->pg_count; 310 nbytes = desc->pg_count;
278 do { 311 do {
279 size_t len = min(nbytes,rsize); 312 size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
281 data = nfs_readdata_alloc(1); 314 data = nfs_readdata_alloc(1);
282 if (!data) 315 if (!data)
283 goto out_bad; 316 goto out_bad;
284 list_add(&data->pages, &list); 317 data->pagevec[0] = page;
318 nfs_read_rpcsetup(req, data, len, offset);
319 list_add(&data->list, res);
285 requests++; 320 requests++;
286 nbytes -= len; 321 nbytes -= len;
322 offset += len;
287 } while(nbytes != 0); 323 } while(nbytes != 0);
288 atomic_set(&req->wb_complete, requests); 324 atomic_set(&req->wb_complete, requests);
289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
292 req_offset(req), desc->pg_count,
293 IOMODE_READ, GFP_KERNEL);
294 ClearPageError(page); 325 ClearPageError(page);
295 offset = 0; 326 desc->pg_rpc_callops = &nfs_read_partial_ops;
296 nbytes = desc->pg_count;
297 do {
298 int ret2;
299
300 data = list_entry(list.next, struct nfs_read_data, pages);
301 list_del_init(&data->pages);
302
303 data->pagevec[0] = page;
304
305 if (nbytes < rsize)
306 rsize = nbytes;
307 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
308 rsize, offset, lseg);
309 if (ret == 0)
310 ret = ret2;
311 offset += rsize;
312 nbytes -= rsize;
313 } while (nbytes != 0);
314 put_lseg(lseg);
315 desc->pg_lseg = NULL;
316
317 return ret; 327 return ret;
318
319out_bad: 328out_bad:
320 while (!list_empty(&list)) { 329 while (!list_empty(res)) {
321 data = list_entry(list.next, struct nfs_read_data, pages); 330 data = list_entry(res->next, struct nfs_read_data, list);
322 list_del(&data->pages); 331 list_del(&data->list);
323 nfs_readdata_free(data); 332 nfs_readdata_free(data);
324 } 333 }
325 SetPageError(page); 334 SetPageError(page);
@@ -327,19 +336,19 @@ out_bad:
327 return -ENOMEM; 336 return -ENOMEM;
328} 337}
329 338
330static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) 339static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
331{ 340{
332 struct nfs_page *req; 341 struct nfs_page *req;
333 struct page **pages; 342 struct page **pages;
334 struct nfs_read_data *data; 343 struct nfs_read_data *data;
335 struct list_head *head = &desc->pg_list; 344 struct list_head *head = &desc->pg_list;
336 struct pnfs_layout_segment *lseg = desc->pg_lseg; 345 int ret = 0;
337 int ret = -ENOMEM;
338 346
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 347 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 348 desc->pg_count));
341 if (!data) { 349 if (!data) {
342 nfs_async_read_error(head); 350 nfs_async_read_error(head);
351 ret = -ENOMEM;
343 goto out; 352 goto out;
344 } 353 }
345 354
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
352 *pages++ = req->wb_page; 361 *pages++ = req->wb_page;
353 } 362 }
354 req = nfs_list_entry(data->pages.next); 363 req = nfs_list_entry(data->pages.next);
355 if ((!lseg) && list_is_singular(&data->pages))
356 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
357 req_offset(req), desc->pg_count,
358 IOMODE_READ, GFP_KERNEL);
359 364
360 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 365 nfs_read_rpcsetup(req, data, desc->pg_count, 0);
361 0, lseg); 366 list_add(&data->list, res);
367 desc->pg_rpc_callops = &nfs_read_full_ops;
362out: 368out:
363 put_lseg(lseg);
364 desc->pg_lseg = NULL;
365 return ret; 369 return ret;
366} 370}
367 371
372int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
373{
374 if (desc->pg_bsize < PAGE_CACHE_SIZE)
375 return nfs_pagein_multi(desc, head);
376 return nfs_pagein_one(desc, head);
377}
378
379static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
380{
381 LIST_HEAD(head);
382 int ret;
383
384 ret = nfs_generic_pagein(desc, &head);
385 if (ret == 0)
386 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
387 return ret;
388}
389
390static const struct nfs_pageio_ops nfs_pageio_read_ops = {
391 .pg_test = nfs_generic_pg_test,
392 .pg_doio = nfs_generic_pg_readpages,
393};
394
368/* 395/*
369 * This is the callback from RPC telling us whether a reply was 396 * This is the callback from RPC telling us whether a reply was
370 * received or some error occurred (timeout or socket shutdown). 397 * received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
635 .pgio = &pgio, 662 .pgio = &pgio,
636 }; 663 };
637 struct inode *inode = mapping->host; 664 struct inode *inode = mapping->host;
638 struct nfs_server *server = NFS_SERVER(inode);
639 size_t rsize = server->rsize;
640 unsigned long npages; 665 unsigned long npages;
641 int ret = -ESTALE; 666 int ret = -ESTALE;
642 667
@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
664 if (ret == 0) 689 if (ret == 0)
665 goto read_complete; /* all pages were read */ 690 goto read_complete; /* all pages were read */
666 691
667 if (rsize < PAGE_CACHE_SIZE) 692 nfs_pageio_init_read(&pgio, inode);
668 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
669 else
670 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
671 693
672 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 694 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
673 695
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8d6864c2a5fa..b2fbbde58e44 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
147 147
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret;
151 void *devname_garbage = NULL; 151 void *devname_garbage = NULL;
152 152
153 /* 153 /*
@@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
155 * the sillyrename information to the aliased dentry. 155 * the sillyrename information to the aliased dentry.
156 */ 156 */
157 nfs_free_dname(data); 157 nfs_free_dname(data);
158 ret = nfs_copy_dname(alias, data);
158 spin_lock(&alias->d_lock); 159 spin_lock(&alias->d_lock);
159 if (alias->d_inode != NULL && 160 if (ret == 0 && alias->d_inode != NULL &&
160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 161 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata; 162 devname_garbage = alias->d_fsdata;
162 alias->d_fsdata = data; 163 alias->d_fsdata = data;
163 alias->d_flags |= DCACHE_NFSFS_RENAMED; 164 alias->d_flags |= DCACHE_NFSFS_RENAMED;
164 ret = 1; 165 ret = 1;
165 } 166 } else
167 ret = 0;
166 spin_unlock(&alias->d_lock); 168 spin_unlock(&alias->d_lock);
167 nfs_dec_sillycount(dir); 169 nfs_dec_sillycount(dir);
168 dput(alias); 170 dput(alias);
@@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
171 * point dentry is definitely not a root, so we won't need 173 * point dentry is definitely not a root, so we won't need
172 * that anymore. 174 * that anymore.
173 */ 175 */
174 if (devname_garbage) 176 kfree(devname_garbage);
175 kfree(devname_garbage);
176 return ret; 177 return ret;
177 } 178 }
178 data->dir = igrab(dir); 179 data->dir = igrab(dir);
@@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
204 if (parent == NULL) 205 if (parent == NULL)
205 goto out_free; 206 goto out_free;
206 dir = parent->d_inode; 207 dir = parent->d_inode;
207 if (nfs_copy_dname(dentry, data) != 0)
208 goto out_dput;
209 /* Non-exclusive lock protects against concurrent lookup() calls */ 208 /* Non-exclusive lock protects against concurrent lookup() calls */
210 spin_lock(&dir->i_lock); 209 spin_lock(&dir->i_lock);
211 if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { 210 if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
366 struct nfs_renamedata *data = calldata; 365 struct nfs_renamedata *data = calldata;
367 struct inode *old_dir = data->old_dir; 366 struct inode *old_dir = data->old_dir;
368 struct inode *new_dir = data->new_dir; 367 struct inode *new_dir = data->new_dir;
368 struct dentry *old_dentry = data->old_dentry;
369 struct dentry *new_dentry = data->new_dentry;
369 370
370 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { 371 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
371 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); 372 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
373 } 374 }
374 375
375 if (task->tk_status != 0) { 376 if (task->tk_status != 0) {
376 nfs_cancel_async_unlink(data->old_dentry); 377 nfs_cancel_async_unlink(old_dentry);
377 return; 378 return;
378 } 379 }
379 380
380 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); 381 d_drop(old_dentry);
381 d_move(data->old_dentry, data->new_dentry); 382 d_drop(new_dentry);
382} 383}
383 384
384/** 385/**
@@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
501 * and only performs the unlink once the last reference to it is put. 502 * and only performs the unlink once the last reference to it is put.
502 * 503 *
503 * The final cleanup is done during dentry_iput. 504 * The final cleanup is done during dentry_iput.
505 *
506 * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
507 * could take responsibility for keeping open files referenced. The server
508 * would also need to ensure that opened-but-deleted files were kept over
509 * reboots. However, we may not assume a server does so. (RFC 5661
510 * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
511 * use to advertise that it does this; some day we may take advantage of
512 * it.))
504 */ 513 */
505int 514int
506nfs_sillyrename(struct inode *dir, struct dentry *dentry) 515nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
560 if (error) 569 if (error)
561 goto out_dput; 570 goto out_dput;
562 571
572 /* populate unlinkdata with the right dname */
573 error = nfs_copy_dname(sdentry,
574 (struct nfs_unlinkdata *)dentry->d_fsdata);
575 if (error) {
576 nfs_cancel_async_unlink(dentry);
577 goto out_dput;
578 }
579
563 /* run the rename task, undo unlink if it fails */ 580 /* run the rename task, undo unlink if it fails */
564 task = nfs_async_rename(dir, dir, dentry, sdentry); 581 task = nfs_async_rename(dir, dir, dentry, sdentry);
565 if (IS_ERR(task)) { 582 if (IS_ERR(task)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 00e37501fa3b..b39b37f80913 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
97 mempool_free(p, nfs_wdata_mempool); 97 mempool_free(p, nfs_wdata_mempool);
98} 98}
99 99
100static void nfs_writedata_release(struct nfs_write_data *wdata) 100void nfs_writedata_release(struct nfs_write_data *wdata)
101{ 101{
102 put_lseg(wdata->lseg); 102 put_lseg(wdata->lseg);
103 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
845/* 845/*
846 * Set up the argument/result storage required for the RPC call. 846 * Set up the argument/result storage required for the RPC call.
847 */ 847 */
848static int nfs_write_rpcsetup(struct nfs_page *req, 848static void nfs_write_rpcsetup(struct nfs_page *req,
849 struct nfs_write_data *data, 849 struct nfs_write_data *data,
850 const struct rpc_call_ops *call_ops,
851 unsigned int count, unsigned int offset, 850 unsigned int count, unsigned int offset,
852 struct pnfs_layout_segment *lseg,
853 int how) 851 int how)
854{ 852{
855 struct inode *inode = req->wb_context->dentry->d_inode; 853 struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
860 data->req = req; 858 data->req = req;
861 data->inode = inode = req->wb_context->dentry->d_inode; 859 data->inode = inode = req->wb_context->dentry->d_inode;
862 data->cred = req->wb_context->cred; 860 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg);
864 861
865 data->args.fh = NFS_FH(inode); 862 data->args.fh = NFS_FH(inode);
866 data->args.offset = req_offset(req) + offset; 863 data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
872 data->args.context = get_nfs_open_context(req->wb_context); 869 data->args.context = get_nfs_open_context(req->wb_context);
873 data->args.lock_context = req->wb_lock_context; 870 data->args.lock_context = req->wb_lock_context;
874 data->args.stable = NFS_UNSTABLE; 871 data->args.stable = NFS_UNSTABLE;
875 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 872 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
876 data->args.stable = NFS_DATA_SYNC; 873 case 0:
877 if (!nfs_need_commit(NFS_I(inode))) 874 break;
878 data->args.stable = NFS_FILE_SYNC; 875 case FLUSH_COND_STABLE:
876 if (nfs_need_commit(NFS_I(inode)))
877 break;
878 default:
879 data->args.stable = NFS_FILE_SYNC;
879 } 880 }
880 881
881 data->res.fattr = &data->fattr; 882 data->res.fattr = &data->fattr;
882 data->res.count = count; 883 data->res.count = count;
883 data->res.verf = &data->verf; 884 data->res.verf = &data->verf;
884 nfs_fattr_init(&data->fattr); 885 nfs_fattr_init(&data->fattr);
886}
885 887
886 if (data->lseg && 888static int nfs_do_write(struct nfs_write_data *data,
887 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) 889 const struct rpc_call_ops *call_ops,
888 return 0; 890 int how)
891{
892 struct inode *inode = data->args.context->dentry->d_inode;
889 893
890 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 894 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
891} 895}
892 896
897static int nfs_do_multiple_writes(struct list_head *head,
898 const struct rpc_call_ops *call_ops,
899 int how)
900{
901 struct nfs_write_data *data;
902 int ret = 0;
903
904 while (!list_empty(head)) {
905 int ret2;
906
907 data = list_entry(head->next, struct nfs_write_data, list);
908 list_del_init(&data->list);
909
910 ret2 = nfs_do_write(data, call_ops, how);
911 if (ret == 0)
912 ret = ret2;
913 }
914 return ret;
915}
916
893/* If a nfs_flush_* function fails, it should remove reqs from @head and 917/* If a nfs_flush_* function fails, it should remove reqs from @head and
894 * call this on each, which will prepare them to be retried on next 918 * call this on each, which will prepare them to be retried on next
895 * writeback using standard nfs. 919 * writeback using standard nfs.
@@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req)
907 * Generate multiple small requests to write out a single 931 * Generate multiple small requests to write out a single
908 * contiguous dirty area on one page. 932 * contiguous dirty area on one page.
909 */ 933 */
910static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) 934static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
911{ 935{
912 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 936 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
913 struct page *page = req->wb_page; 937 struct page *page = req->wb_page;
914 struct nfs_write_data *data; 938 struct nfs_write_data *data;
915 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; 939 size_t wsize = desc->pg_bsize, nbytes;
916 unsigned int offset; 940 unsigned int offset;
917 int requests = 0; 941 int requests = 0;
918 int ret = 0; 942 int ret = 0;
919 struct pnfs_layout_segment *lseg;
920 LIST_HEAD(list);
921 943
922 nfs_list_remove_request(req); 944 nfs_list_remove_request(req);
923 945
@@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
927 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 949 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
928 950
929 951
952 offset = 0;
930 nbytes = desc->pg_count; 953 nbytes = desc->pg_count;
931 do { 954 do {
932 size_t len = min(nbytes, wsize); 955 size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
934 data = nfs_writedata_alloc(1); 957 data = nfs_writedata_alloc(1);
935 if (!data) 958 if (!data)
936 goto out_bad; 959 goto out_bad;
937 list_add(&data->pages, &list); 960 data->pagevec[0] = page;
961 nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
962 list_add(&data->list, res);
938 requests++; 963 requests++;
939 nbytes -= len; 964 nbytes -= len;
965 offset += len;
940 } while (nbytes != 0); 966 } while (nbytes != 0);
941 atomic_set(&req->wb_complete, requests); 967 atomic_set(&req->wb_complete, requests);
942 968 desc->pg_rpc_callops = &nfs_write_partial_ops;
943 BUG_ON(desc->pg_lseg);
944 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
945 req_offset(req), desc->pg_count,
946 IOMODE_RW, GFP_NOFS);
947 ClearPageError(page);
948 offset = 0;
949 nbytes = desc->pg_count;
950 do {
951 int ret2;
952
953 data = list_entry(list.next, struct nfs_write_data, pages);
954 list_del_init(&data->pages);
955
956 data->pagevec[0] = page;
957
958 if (nbytes < wsize)
959 wsize = nbytes;
960 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
961 wsize, offset, lseg, desc->pg_ioflags);
962 if (ret == 0)
963 ret = ret2;
964 offset += wsize;
965 nbytes -= wsize;
966 } while (nbytes != 0);
967
968 put_lseg(lseg);
969 desc->pg_lseg = NULL;
970 return ret; 969 return ret;
971 970
972out_bad: 971out_bad:
973 while (!list_empty(&list)) { 972 while (!list_empty(res)) {
974 data = list_entry(list.next, struct nfs_write_data, pages); 973 data = list_entry(res->next, struct nfs_write_data, list);
975 list_del(&data->pages); 974 list_del(&data->list);
976 nfs_writedata_free(data); 975 nfs_writedata_free(data);
977 } 976 }
978 nfs_redirty_request(req); 977 nfs_redirty_request(req);
@@ -987,14 +986,13 @@ out_bad:
987 * This is the case if nfs_updatepage detects a conflicting request 986 * This is the case if nfs_updatepage detects a conflicting request
988 * that has been written but not committed. 987 * that has been written but not committed.
989 */ 988 */
990static int nfs_flush_one(struct nfs_pageio_descriptor *desc) 989static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
991{ 990{
992 struct nfs_page *req; 991 struct nfs_page *req;
993 struct page **pages; 992 struct page **pages;
994 struct nfs_write_data *data; 993 struct nfs_write_data *data;
995 struct list_head *head = &desc->pg_list; 994 struct list_head *head = &desc->pg_list;
996 struct pnfs_layout_segment *lseg = desc->pg_lseg; 995 int ret = 0;
997 int ret;
998 996
999 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 997 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
1000 desc->pg_count)); 998 desc->pg_count));
@@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1016 *pages++ = req->wb_page; 1014 *pages++ = req->wb_page;
1017 } 1015 }
1018 req = nfs_list_entry(data->pages.next); 1016 req = nfs_list_entry(data->pages.next);
1019 if ((!lseg) && list_is_singular(&data->pages))
1020 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
1021 req_offset(req), desc->pg_count,
1022 IOMODE_RW, GFP_NOFS);
1023 1017
1024 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1025 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1026 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1020 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1027 1021
1028 /* Set up the argument struct */ 1022 /* Set up the argument struct */
1029 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1023 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
1024 list_add(&data->list, res);
1025 desc->pg_rpc_callops = &nfs_write_full_ops;
1030out: 1026out:
1031 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
1032 desc->pg_lseg = NULL;
1033 return ret; 1027 return ret;
1034} 1028}
1035 1029
1036static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1030int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
1031{
1032 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1033 return nfs_flush_multi(desc, head);
1034 return nfs_flush_one(desc, head);
1035}
1036
1037static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1038{
1039 LIST_HEAD(head);
1040 int ret;
1041
1042 ret = nfs_generic_flush(desc, &head);
1043 if (ret == 0)
1044 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
1045 desc->pg_ioflags);
1046 return ret;
1047}
1048
1049static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1050 .pg_test = nfs_generic_pg_test,
1051 .pg_doio = nfs_generic_pg_writepages,
1052};
1053
1054static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1037 struct inode *inode, int ioflags) 1055 struct inode *inode, int ioflags)
1038{ 1056{
1039 size_t wsize = NFS_SERVER(inode)->wsize; 1057 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
1058 NFS_SERVER(inode)->wsize, ioflags);
1059}
1060
1061void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1062{
1063 pgio->pg_ops = &nfs_pageio_write_ops;
1064 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1065}
1066EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1040 1067
1041 if (wsize < PAGE_CACHE_SIZE) 1068static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1042 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1069 struct inode *inode, int ioflags)
1043 else 1070{
1044 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 1071 if (!pnfs_pageio_init_write(pgio, inode, ioflags))
1072 nfs_pageio_init_write_mds(pgio, inode, ioflags);
1045} 1073}
1046 1074
1047/* 1075/*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3b8d3979e03b..98e544274390 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -93,7 +93,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
93 93
94 memset(bh->b_data, 0, sizeof(struct omfs_inode)); 94 memset(bh->b_data, 0, sizeof(struct omfs_inode));
95 95
96 if (inode->i_mode & S_IFDIR) { 96 if (S_ISDIR(inode->i_mode)) {
97 memset(&bh->b_data[OMFS_DIR_START], 0xff, 97 memset(&bh->b_data[OMFS_DIR_START], 0xff,
98 sbi->s_sys_blocksize - OMFS_DIR_START); 98 sbi->s_sys_blocksize - OMFS_DIR_START);
99 } else 99 } else
diff --git a/fs/open.c b/fs/open.c
index 739b751aa73e..f71192109457 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -446,74 +446,52 @@ out:
446 return error; 446 return error;
447} 447}
448 448
449SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) 449static int chmod_common(struct path *path, umode_t mode)
450{ 450{
451 struct inode * inode; 451 struct inode *inode = path->dentry->d_inode;
452 struct dentry * dentry;
453 struct file * file;
454 int err = -EBADF;
455 struct iattr newattrs; 452 struct iattr newattrs;
453 int error;
456 454
457 file = fget(fd); 455 error = mnt_want_write(path->mnt);
458 if (!file) 456 if (error)
459 goto out; 457 return error;
460
461 dentry = file->f_path.dentry;
462 inode = dentry->d_inode;
463
464 audit_inode(NULL, dentry);
465
466 err = mnt_want_write_file(file);
467 if (err)
468 goto out_putf;
469 mutex_lock(&inode->i_mutex); 458 mutex_lock(&inode->i_mutex);
470 err = security_path_chmod(dentry, file->f_vfsmnt, mode); 459 error = security_path_chmod(path->dentry, path->mnt, mode);
471 if (err) 460 if (error)
472 goto out_unlock; 461 goto out_unlock;
473 if (mode == (mode_t) -1)
474 mode = inode->i_mode;
475 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 462 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
476 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 463 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
477 err = notify_change(dentry, &newattrs); 464 error = notify_change(path->dentry, &newattrs);
478out_unlock: 465out_unlock:
479 mutex_unlock(&inode->i_mutex); 466 mutex_unlock(&inode->i_mutex);
480 mnt_drop_write(file->f_path.mnt); 467 mnt_drop_write(path->mnt);
481out_putf: 468 return error;
482 fput(file); 469}
483out: 470
471SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
472{
473 struct file * file;
474 int err = -EBADF;
475
476 file = fget(fd);
477 if (file) {
478 audit_inode(NULL, file->f_path.dentry);
479 err = chmod_common(&file->f_path, mode);
480 fput(file);
481 }
484 return err; 482 return err;
485} 483}
486 484
487SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) 485SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
488{ 486{
489 struct path path; 487 struct path path;
490 struct inode *inode;
491 int error; 488 int error;
492 struct iattr newattrs;
493 489
494 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); 490 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
495 if (error) 491 if (!error) {
496 goto out; 492 error = chmod_common(&path, mode);
497 inode = path.dentry->d_inode; 493 path_put(&path);
498 494 }
499 error = mnt_want_write(path.mnt);
500 if (error)
501 goto dput_and_out;
502 mutex_lock(&inode->i_mutex);
503 error = security_path_chmod(path.dentry, path.mnt, mode);
504 if (error)
505 goto out_unlock;
506 if (mode == (mode_t) -1)
507 mode = inode->i_mode;
508 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
509 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
510 error = notify_change(path.dentry, &newattrs);
511out_unlock:
512 mutex_unlock(&inode->i_mutex);
513 mnt_drop_write(path.mnt);
514dput_and_out:
515 path_put(&path);
516out:
517 return error; 495 return error;
518} 496}
519 497
diff --git a/fs/pipe.c b/fs/pipe.c
index 1b7f9af67ccf..0e0be1dc0f8e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -948,7 +948,7 @@ static const struct dentry_operations pipefs_dentry_operations = {
948 948
949static struct inode * get_pipe_inode(void) 949static struct inode * get_pipe_inode(void)
950{ 950{
951 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 951 struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
952 struct pipe_inode_info *pipe; 952 struct pipe_inode_info *pipe;
953 953
954 if (!inode) 954 if (!inode)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f17c37c..9d99131d0d65 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -620,8 +620,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
620 if (!ent) goto out; 620 if (!ent) goto out;
621 621
622 memset(ent, 0, sizeof(struct proc_dir_entry)); 622 memset(ent, 0, sizeof(struct proc_dir_entry));
623 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 623 memcpy(ent->name, fn, len + 1);
624 ent->name = ((char *) ent) + sizeof(*ent);
625 ent->namelen = len; 624 ent->namelen = len;
626 ent->mode = mode; 625 ent->mode = mode;
627 ent->nlink = nlink; 626 ent->nlink = nlink;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 9020ac15baaa..f738024ccc8e 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -197,15 +197,15 @@ static __net_init int proc_net_ns_init(struct net *net)
197 int err; 197 int err;
198 198
199 err = -ENOMEM; 199 err = -ENOMEM;
200 netd = kzalloc(sizeof(*netd), GFP_KERNEL); 200 netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
201 if (!netd) 201 if (!netd)
202 goto out; 202 goto out;
203 203
204 netd->data = net; 204 netd->data = net;
205 netd->nlink = 2; 205 netd->nlink = 2;
206 netd->name = "net";
207 netd->namelen = 3; 206 netd->namelen = 3;
208 netd->parent = &proc_root; 207 netd->parent = &proc_root;
208 memcpy(netd->name, "net", 4);
209 209
210 err = -EEXIST; 210 err = -EEXIST;
211 net_statd = proc_net_mkdir(net, "stat", netd); 211 net_statd = proc_net_mkdir(net, "stat", netd);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d6c3b416529b..9a8a2b77b874 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -186,13 +186,13 @@ static const struct inode_operations proc_root_inode_operations = {
186struct proc_dir_entry proc_root = { 186struct proc_dir_entry proc_root = {
187 .low_ino = PROC_ROOT_INO, 187 .low_ino = PROC_ROOT_INO,
188 .namelen = 5, 188 .namelen = 5,
189 .name = "/proc",
190 .mode = S_IFDIR | S_IRUGO | S_IXUGO, 189 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
191 .nlink = 2, 190 .nlink = 2,
192 .count = ATOMIC_INIT(1), 191 .count = ATOMIC_INIT(1),
193 .proc_iops = &proc_root_inode_operations, 192 .proc_iops = &proc_root_inode_operations,
194 .proc_fops = &proc_root_operations, 193 .proc_fops = &proc_root_operations,
195 .parent = &proc_root, 194 .parent = &proc_root,
195 .name = "/proc",
196}; 196};
197 197
198int pid_ns_prepare_proc(struct pid_namespace *ns) 198int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5907b49e4d7e..179f1c33ea57 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -166,8 +166,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
166 * long as offset isn't at the end of the file then the 166 * long as offset isn't at the end of the file then the
167 * offset is data. 167 * offset is data.
168 */ 168 */
169 if (offset >= inode->i_size) 169 if (offset >= inode->i_size) {
170 return -ENXIO; 170 retval = -ENXIO;
171 goto out;
172 }
171 break; 173 break;
172 case SEEK_HOLE: 174 case SEEK_HOLE:
173 /* 175 /*
@@ -175,8 +177,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
175 * as long as offset isn't i_size or larger, return 177 * as long as offset isn't i_size or larger, return
176 * i_size. 178 * i_size.
177 */ 179 */
178 if (offset >= inode->i_size) 180 if (offset >= inode->i_size) {
179 return -ENXIO; 181 retval = -ENXIO;
182 goto out;
183 }
180 offset = inode->i_size; 184 offset = inode->i_size;
181 break; 185 break;
182 } 186 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index acca2c5ca3fa..f7ce7debe14c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -265,7 +265,7 @@ xfs_open_by_handle(
265 return PTR_ERR(filp); 265 return PTR_ERR(filp);
266 } 266 }
267 267
268 if (inode->i_mode & S_IFREG) { 268 if (S_ISREG(inode->i_mode)) {
269 filp->f_flags |= O_NOATIME; 269 filp->f_flags |= O_NOATIME;
270 filp->f_mode |= FMODE_NOCMTIME; 270 filp->f_mode |= FMODE_NOCMTIME;
271 } 271 }
@@ -850,14 +850,14 @@ xfs_set_diflags(
850 di_flags |= XFS_DIFLAG_NODEFRAG; 850 di_flags |= XFS_DIFLAG_NODEFRAG;
851 if (xflags & XFS_XFLAG_FILESTREAM) 851 if (xflags & XFS_XFLAG_FILESTREAM)
852 di_flags |= XFS_DIFLAG_FILESTREAM; 852 di_flags |= XFS_DIFLAG_FILESTREAM;
853 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 853 if (S_ISDIR(ip->i_d.di_mode)) {
854 if (xflags & XFS_XFLAG_RTINHERIT) 854 if (xflags & XFS_XFLAG_RTINHERIT)
855 di_flags |= XFS_DIFLAG_RTINHERIT; 855 di_flags |= XFS_DIFLAG_RTINHERIT;
856 if (xflags & XFS_XFLAG_NOSYMLINKS) 856 if (xflags & XFS_XFLAG_NOSYMLINKS)
857 di_flags |= XFS_DIFLAG_NOSYMLINKS; 857 di_flags |= XFS_DIFLAG_NOSYMLINKS;
858 if (xflags & XFS_XFLAG_EXTSZINHERIT) 858 if (xflags & XFS_XFLAG_EXTSZINHERIT)
859 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 859 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
860 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 860 } else if (S_ISREG(ip->i_d.di_mode)) {
861 if (xflags & XFS_XFLAG_REALTIME) 861 if (xflags & XFS_XFLAG_REALTIME)
862 di_flags |= XFS_DIFLAG_REALTIME; 862 di_flags |= XFS_DIFLAG_REALTIME;
863 if (xflags & XFS_XFLAG_EXTSIZE) 863 if (xflags & XFS_XFLAG_EXTSIZE)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c51a3f903633..ab3e5c6c4642 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -414,7 +414,7 @@ xfs_bmap_add_attrfork_local(
414 414
415 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 415 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
416 return 0; 416 return 0;
417 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 417 if (S_ISDIR(ip->i_d.di_mode)) {
418 mp = ip->i_mount; 418 mp = ip->i_mount;
419 memset(&dargs, 0, sizeof(dargs)); 419 memset(&dargs, 0, sizeof(dargs));
420 dargs.dp = ip; 420 dargs.dp = ip;
@@ -3344,8 +3344,7 @@ xfs_bmap_local_to_extents(
3344 * We don't want to deal with the case of keeping inode data inline yet. 3344 * We don't want to deal with the case of keeping inode data inline yet.
3345 * So sending the data fork of a regular inode is invalid. 3345 * So sending the data fork of a regular inode is invalid.
3346 */ 3346 */
3347 ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG && 3347 ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
3348 whichfork == XFS_DATA_FORK));
3349 ifp = XFS_IFORK_PTR(ip, whichfork); 3348 ifp = XFS_IFORK_PTR(ip, whichfork);
3350 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 3349 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3351 flags = 0; 3350 flags = 0;
@@ -4052,7 +4051,7 @@ xfs_bmap_one_block(
4052 4051
4053#ifndef DEBUG 4052#ifndef DEBUG
4054 if (whichfork == XFS_DATA_FORK) { 4053 if (whichfork == XFS_DATA_FORK) {
4055 return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ? 4054 return S_ISREG(ip->i_d.di_mode) ?
4056 (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : 4055 (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
4057 (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); 4056 (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
4058 } 4057 }
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 4580ce00aeb4..a2e27010c7fb 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -121,7 +121,7 @@ xfs_dir_isempty(
121{ 121{
122 xfs_dir2_sf_hdr_t *sfp; 122 xfs_dir2_sf_hdr_t *sfp;
123 123
124 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 124 ASSERT(S_ISDIR(dp->i_d.di_mode));
125 if (dp->i_d.di_size == 0) /* might happen during shutdown. */ 125 if (dp->i_d.di_size == 0) /* might happen during shutdown. */
126 return 1; 126 return 1;
127 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) 127 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -179,7 +179,7 @@ xfs_dir_init(
179 memset((char *)&args, 0, sizeof(args)); 179 memset((char *)&args, 0, sizeof(args));
180 args.dp = dp; 180 args.dp = dp;
181 args.trans = tp; 181 args.trans = tp;
182 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 182 ASSERT(S_ISDIR(dp->i_d.di_mode));
183 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) 183 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
184 return error; 184 return error;
185 return xfs_dir2_sf_create(&args, pdp->i_ino); 185 return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -202,7 +202,7 @@ xfs_dir_createname(
202 int rval; 202 int rval;
203 int v; /* type-checking value */ 203 int v; /* type-checking value */
204 204
205 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 205 ASSERT(S_ISDIR(dp->i_d.di_mode));
206 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 206 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
207 return rval; 207 return rval;
208 XFS_STATS_INC(xs_dir_create); 208 XFS_STATS_INC(xs_dir_create);
@@ -278,7 +278,7 @@ xfs_dir_lookup(
278 int rval; 278 int rval;
279 int v; /* type-checking value */ 279 int v; /* type-checking value */
280 280
281 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 281 ASSERT(S_ISDIR(dp->i_d.di_mode));
282 XFS_STATS_INC(xs_dir_lookup); 282 XFS_STATS_INC(xs_dir_lookup);
283 283
284 memset(&args, 0, sizeof(xfs_da_args_t)); 284 memset(&args, 0, sizeof(xfs_da_args_t));
@@ -333,7 +333,7 @@ xfs_dir_removename(
333 int rval; 333 int rval;
334 int v; /* type-checking value */ 334 int v; /* type-checking value */
335 335
336 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 336 ASSERT(S_ISDIR(dp->i_d.di_mode));
337 XFS_STATS_INC(xs_dir_remove); 337 XFS_STATS_INC(xs_dir_remove);
338 338
339 memset(&args, 0, sizeof(xfs_da_args_t)); 339 memset(&args, 0, sizeof(xfs_da_args_t));
@@ -382,7 +382,7 @@ xfs_readdir(
382 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 382 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
383 return XFS_ERROR(EIO); 383 return XFS_ERROR(EIO);
384 384
385 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 385 ASSERT(S_ISDIR(dp->i_d.di_mode));
386 XFS_STATS_INC(xs_dir_getdents); 386 XFS_STATS_INC(xs_dir_getdents);
387 387
388 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 388 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -414,7 +414,7 @@ xfs_dir_replace(
414 int rval; 414 int rval;
415 int v; /* type-checking value */ 415 int v; /* type-checking value */
416 416
417 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 417 ASSERT(S_ISDIR(dp->i_d.di_mode));
418 418
419 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 419 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
420 return rval; 420 return rval;
@@ -464,7 +464,7 @@ xfs_dir_canenter(
464 if (resblks) 464 if (resblks)
465 return 0; 465 return 0;
466 466
467 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 467 ASSERT(S_ISDIR(dp->i_d.di_mode));
468 468
469 memset(&args, 0, sizeof(xfs_da_args_t)); 469 memset(&args, 0, sizeof(xfs_da_args_t));
470 args.name = name->name; 470 args.name = name->name;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9124425b7f2f..3ff3d9e23ded 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -344,9 +344,9 @@ _xfs_filestream_update_ag(
344 * Either ip is a regular file and pip is a directory, or ip is a 344 * Either ip is a regular file and pip is a directory, or ip is a
345 * directory and pip is NULL. 345 * directory and pip is NULL.
346 */ 346 */
347 ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip && 347 ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
348 (pip->i_d.di_mode & S_IFDIR)) || 348 S_ISDIR(pip->i_d.di_mode)) ||
349 ((ip->i_d.di_mode & S_IFDIR) && !pip))); 349 (S_ISDIR(ip->i_d.di_mode) && !pip)));
350 350
351 mp = ip->i_mount; 351 mp = ip->i_mount;
352 cache = mp->m_filestream; 352 cache = mp->m_filestream;
@@ -537,7 +537,7 @@ xfs_filestream_lookup_ag(
537 xfs_agnumber_t ag; 537 xfs_agnumber_t ag;
538 int ref; 538 int ref;
539 539
540 if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) { 540 if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
541 ASSERT(0); 541 ASSERT(0);
542 return NULLAGNUMBER; 542 return NULLAGNUMBER;
543 } 543 }
@@ -579,9 +579,9 @@ xfs_filestream_associate(
579 xfs_agnumber_t ag, rotorstep, startag; 579 xfs_agnumber_t ag, rotorstep, startag;
580 int err = 0; 580 int err = 0;
581 581
582 ASSERT(pip->i_d.di_mode & S_IFDIR); 582 ASSERT(S_ISDIR(pip->i_d.di_mode));
583 ASSERT(ip->i_d.di_mode & S_IFREG); 583 ASSERT(S_ISREG(ip->i_d.di_mode));
584 if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG)) 584 if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
585 return -EINVAL; 585 return -EINVAL;
586 586
587 mp = pip->i_mount; 587 mp = pip->i_mount;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3cc21ddf9f7e..2fcca4b03ed3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -368,7 +368,7 @@ xfs_iformat(
368 /* 368 /*
369 * no local regular files yet 369 * no local regular files yet
370 */ 370 */
371 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 371 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
372 xfs_warn(ip->i_mount, 372 xfs_warn(ip->i_mount,
373 "corrupt inode %Lu (local format for regular file).", 373 "corrupt inode %Lu (local format for regular file).",
374 (unsigned long long) ip->i_ino); 374 (unsigned long long) ip->i_ino);
@@ -1040,7 +1040,7 @@ xfs_ialloc(
1040 1040
1041 if (pip && XFS_INHERIT_GID(pip)) { 1041 if (pip && XFS_INHERIT_GID(pip)) {
1042 ip->i_d.di_gid = pip->i_d.di_gid; 1042 ip->i_d.di_gid = pip->i_d.di_gid;
1043 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1043 if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
1044 ip->i_d.di_mode |= S_ISGID; 1044 ip->i_d.di_mode |= S_ISGID;
1045 } 1045 }
1046 } 1046 }
@@ -1097,14 +1097,14 @@ xfs_ialloc(
1097 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1097 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1098 uint di_flags = 0; 1098 uint di_flags = 0;
1099 1099
1100 if ((mode & S_IFMT) == S_IFDIR) { 1100 if (S_ISDIR(mode)) {
1101 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1101 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1102 di_flags |= XFS_DIFLAG_RTINHERIT; 1102 di_flags |= XFS_DIFLAG_RTINHERIT;
1103 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1103 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1104 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1104 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1105 ip->i_d.di_extsize = pip->i_d.di_extsize; 1105 ip->i_d.di_extsize = pip->i_d.di_extsize;
1106 } 1106 }
1107 } else if ((mode & S_IFMT) == S_IFREG) { 1107 } else if (S_ISREG(mode)) {
1108 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1108 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1109 di_flags |= XFS_DIFLAG_REALTIME; 1109 di_flags |= XFS_DIFLAG_REALTIME;
1110 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1110 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1188,7 +1188,7 @@ xfs_isize_check(
1188 int nimaps; 1188 int nimaps;
1189 xfs_bmbt_irec_t imaps[2]; 1189 xfs_bmbt_irec_t imaps[2];
1190 1190
1191 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1191 if (!S_ISREG(ip->i_d.di_mode))
1192 return; 1192 return;
1193 1193
1194 if (XFS_IS_REALTIME_INODE(ip)) 1194 if (XFS_IS_REALTIME_INODE(ip))
@@ -1828,7 +1828,7 @@ xfs_ifree(
1828 ASSERT(ip->i_d.di_nextents == 0); 1828 ASSERT(ip->i_d.di_nextents == 0);
1829 ASSERT(ip->i_d.di_anextents == 0); 1829 ASSERT(ip->i_d.di_anextents == 0);
1830 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 1830 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
1831 ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 1831 (!S_ISREG(ip->i_d.di_mode)));
1832 ASSERT(ip->i_d.di_nblocks == 0); 1832 ASSERT(ip->i_d.di_nblocks == 0);
1833 1833
1834 /* 1834 /*
@@ -2671,7 +2671,7 @@ xfs_iflush_int(
2671 __func__, ip->i_ino, ip, ip->i_d.di_magic); 2671 __func__, ip->i_ino, ip, ip->i_d.di_magic);
2672 goto corrupt_out; 2672 goto corrupt_out;
2673 } 2673 }
2674 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 2674 if (S_ISREG(ip->i_d.di_mode)) {
2675 if (XFS_TEST_ERROR( 2675 if (XFS_TEST_ERROR(
2676 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 2676 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
2677 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 2677 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2681,7 +2681,7 @@ xfs_iflush_int(
2681 __func__, ip->i_ino, ip); 2681 __func__, ip->i_ino, ip);
2682 goto corrupt_out; 2682 goto corrupt_out;
2683 } 2683 }
2684 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 2684 } else if (S_ISDIR(ip->i_d.di_mode)) {
2685 if (XFS_TEST_ERROR( 2685 if (XFS_TEST_ERROR(
2686 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 2686 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
2687 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 2687 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a97644ab945a..2380a4bcbece 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -263,7 +263,7 @@ typedef struct xfs_inode {
263 struct inode i_vnode; /* embedded VFS inode */ 263 struct inode i_vnode; /* embedded VFS inode */
264} xfs_inode_t; 264} xfs_inode_t;
265 265
266#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 266#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
267 (ip)->i_size : (ip)->i_d.di_size; 267 (ip)->i_size : (ip)->i_d.di_size;
268 268
269/* Convert from vfs inode to xfs inode */ 269/* Convert from vfs inode to xfs inode */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8fe4206de057..052a2c0ec5fb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2283,7 +2283,7 @@ xlog_recover_inode_pass2(
2283 /* Take the opportunity to reset the flush iteration count */ 2283 /* Take the opportunity to reset the flush iteration count */
2284 dicp->di_flushiter = 0; 2284 dicp->di_flushiter = 0;
2285 2285
2286 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2286 if (unlikely(S_ISREG(dicp->di_mode))) {
2287 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2287 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2288 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2288 (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
2289 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2289 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2296,7 +2296,7 @@ xlog_recover_inode_pass2(
2296 error = EFSCORRUPTED; 2296 error = EFSCORRUPTED;
2297 goto error; 2297 goto error;
2298 } 2298 }
2299 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { 2299 } else if (unlikely(S_ISDIR(dicp->di_mode))) {
2300 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2300 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
2301 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2301 (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
2302 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2302 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7f25245da289..092e16ae4d9d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1331,7 +1331,7 @@ xfs_mountfs(
1331 1331
1332 ASSERT(rip != NULL); 1332 ASSERT(rip != NULL);
1333 1333
1334 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1334 if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
1335 xfs_warn(mp, "corrupted root inode %llu: not a directory", 1335 xfs_warn(mp, "corrupted root inode %llu: not a directory",
1336 (unsigned long long)rip->i_ino); 1336 (unsigned long long)rip->i_ino);
1337 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1337 xfs_iunlock(rip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 77a59891734e..df78c297d1a1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -116,7 +116,7 @@ xfs_rename(
116 trace_xfs_rename(src_dp, target_dp, src_name, target_name); 116 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
117 117
118 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
119 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); 119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
120 120
121 if (src_is_directory) { 121 if (src_is_directory) {
122 /* 122 /*
@@ -226,7 +226,7 @@ xfs_rename(
226 * target and source are directories and that target can be 226 * target and source are directories and that target can be
227 * destroyed, or that neither is a directory. 227 * destroyed, or that neither is a directory.
228 */ 228 */
229 if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 229 if (S_ISDIR(target_ip->i_d.di_mode)) {
230 /* 230 /*
231 * Make sure target dir is empty. 231 * Make sure target dir is empty.
232 */ 232 */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 88d121486c52..9322e13f0c63 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -121,7 +121,7 @@ xfs_readlink(
121 121
122 xfs_ilock(ip, XFS_ILOCK_SHARED); 122 xfs_ilock(ip, XFS_ILOCK_SHARED);
123 123
124 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 124 ASSERT(S_ISLNK(ip->i_d.di_mode));
125 ASSERT(ip->i_d.di_size <= MAXPATHLEN); 125 ASSERT(ip->i_d.di_size <= MAXPATHLEN);
126 126
127 pathlen = ip->i_d.di_size; 127 pathlen = ip->i_d.di_size;
@@ -529,7 +529,7 @@ xfs_release(
529 if (ip->i_d.di_nlink == 0) 529 if (ip->i_d.di_nlink == 0)
530 return 0; 530 return 0;
531 531
532 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 532 if ((S_ISREG(ip->i_d.di_mode) &&
533 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 533 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
534 ip->i_delayed_blks > 0)) && 534 ip->i_delayed_blks > 0)) &&
535 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 535 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
@@ -610,7 +610,7 @@ xfs_inactive(
610 truncate = ((ip->i_d.di_nlink == 0) && 610 truncate = ((ip->i_d.di_nlink == 0) &&
611 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 611 ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
612 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 612 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
613 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 613 S_ISREG(ip->i_d.di_mode));
614 614
615 mp = ip->i_mount; 615 mp = ip->i_mount;
616 616
@@ -621,7 +621,7 @@ xfs_inactive(
621 goto out; 621 goto out;
622 622
623 if (ip->i_d.di_nlink != 0) { 623 if (ip->i_d.di_nlink != 0) {
624 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 624 if ((S_ISREG(ip->i_d.di_mode) &&
625 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 625 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
626 ip->i_delayed_blks > 0)) && 626 ip->i_delayed_blks > 0)) &&
627 (ip->i_df.if_flags & XFS_IFEXTENTS) && 627 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -669,7 +669,7 @@ xfs_inactive(
669 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 669 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
670 return VN_INACTIVE_CACHE; 670 return VN_INACTIVE_CACHE;
671 } 671 }
672 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 672 } else if (S_ISLNK(ip->i_d.di_mode)) {
673 673
674 /* 674 /*
675 * If we get an error while cleaning up a 675 * If we get an error while cleaning up a