aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c15
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/9p/vfs_super.c80
-rw-r--r--fs/adfs/map.c2
-rw-r--r--fs/afs/cache.c12
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/ChangeLog10
-rw-r--r--fs/befs/befs_fs_types.h2
-rw-r--r--fs/befs/btree.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c29
-rw-r--r--fs/btrfs/acl.c12
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/disk-io.c7
-rw-r--r--fs/btrfs/extent-tree.c166
-rw-r--r--fs/btrfs/extent_io.c85
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c23
-rw-r--r--fs/btrfs/free-space-cache.c216
-rw-r--r--fs/btrfs/inode.c216
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/root-tree.c18
-rw-r--r--fs/btrfs/super.c61
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-log.c7
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/xattr.c33
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/caps.c32
-rw-r--r--fs/ceph/file.c5
-rw-r--r--fs/ceph/inode.c7
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/ceph/snap.c4
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c12
-rw-r--r--fs/cifs/AUTHORS2
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README28
-rw-r--r--fs/cifs/cache.c2
-rw-r--r--fs/cifs/cifs_debug.c45
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_unicode.c49
-rw-r--r--fs/cifs/cifs_unicode.h5
-rw-r--r--fs/cifs/cifsacl.c483
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c33
-rw-r--r--fs/cifs/cifsfs.c125
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h21
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c393
-rw-r--r--fs/cifs/connect.c479
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c237
-rw-r--r--fs/cifs/inode.c129
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c13
-rw-r--r--fs/cifs/netmisc.c7
-rw-r--r--fs/cifs/sess.c51
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c66
-rw-r--r--fs/cifs/xattr.c20
-rw-r--r--fs/configfs/dir.c41
-rw-r--r--fs/dcache.c90
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/recover.c2
-rw-r--r--fs/ecryptfs/crypto.c21
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h7
-rw-r--r--fs/ecryptfs/file.c25
-rw-r--r--fs/ecryptfs/inode.c60
-rw-r--r--fs/ecryptfs/kthread.c6
-rw-r--r--fs/ecryptfs/main.c76
-rw-r--r--fs/ecryptfs/super.c16
-rw-r--r--fs/eventpoll.c8
-rw-r--r--fs/exec.c1
-rw-r--r--fs/exofs/common.h4
-rw-r--r--fs/ext2/balloc.c6
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/balloc.c10
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/fsync.c19
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/super.c78
-rw-r--r--fs/fhandle.c1
-rw-r--r--fs/file.c18
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_fshead.c2
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/freevxfs/vxfs_olt.h2
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c10
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c199
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c104
-rw-r--r--fs/gfs2/glock.c102
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c176
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1560
-rw-r--r--fs/gfs2/inode.h11
-rw-r--r--fs/gfs2/log.c179
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c34
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c154
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/hpfs/alloc.c118
-rw-r--r--fs/hpfs/anode.c138
-rw-r--r--fs/hpfs/buffer.c24
-rw-r--r--fs/hpfs/dir.c22
-rw-r--r--fs/hpfs/dnode.c174
-rw-r--r--fs/hpfs/ea.c136
-rw-r--r--fs/hpfs/file.c31
-rw-r--r--fs/hpfs/hpfs.h439
-rw-r--r--fs/hpfs/hpfs_fn.h80
-rw-r--r--fs/hpfs/inode.c47
-rw-r--r--fs/hpfs/map.c56
-rw-r--r--fs/hpfs/name.c33
-rw-r--r--fs/hpfs/namei.c106
-rw-r--r--fs/hpfs/super.c118
-rw-r--r--fs/inode.c9
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c7
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/jffs2/TODO2
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c4
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jfs/jfs_dmap.c4
-rw-r--r--fs/jfs/jfs_extent.c6
-rw-r--r--fs/jfs/jfs_imap.c14
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_metapage.h2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/resize.c4
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/dev_mtd.c2
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/logfs/super.c8
-rw-r--r--fs/mbcache.c2
-rw-r--r--fs/namei.c5
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/namespace.c60
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4filelayout.c27
-rw-r--r--fs/nfs/nfs4filelayout.h4
-rw-r--r--fs/nfs/nfs4filelayoutdev.c34
-rw-r--r--fs/nfs/nfs4proc.c126
-rw-r--r--fs/nfs/nfs4state.c51
-rw-r--r--fs/nfs/nfs4xdr.c53
-rw-r--r--fs/nfs/pnfs.c42
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c13
-rw-r--r--fs/nfs/write.c14
-rw-r--r--fs/nfs_common/nfsacl.c2
-rw-r--r--fs/nfsd/lockd.c1
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/nilfs2/alloc.c2
-rw-r--r--fs/nilfs2/file.c11
-rw-r--r--fs/nilfs2/nilfs.h14
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c3
-rw-r--r--fs/notify/inotify/inotify_user.c39
-rw-r--r--fs/notify/mark.c2
-rw-r--r--fs/ntfs/attrib.c4
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/inode.c4
-rw-r--r--fs/ntfs/layout.h12
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/logfile.h2
-rw-r--r--fs/ntfs/mft.c8
-rw-r--r--fs/ntfs/runlist.c2
-rw-r--r--fs/ntfs/super.c14
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c63
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c7
-rw-r--r--fs/ocfs2/file.c12
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/journal.c5
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2_fs.h6
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/reservations.h2
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/partitions/efi.c6
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/proc/task_mmu.c12
-rw-r--r--fs/pstore/Kconfig2
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/quota/dquot.c15
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/ubifs/Kconfig2
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c63
-rw-r--r--fs/ubifs/debug.h152
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/ubifs/log.c20
-rw-r--r--fs/ubifs/lpt.c7
-rw-r--r--fs/ubifs/recovery.c26
-rw-r--r--fs/ubifs/replay.c18
-rw-r--r--fs/ubifs/super.c47
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/ufs/truncate.c1
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c28
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c31
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h24
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c129
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c231
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_dquot.c2
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/quota/xfs_qm.h5
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/xfs_alloc.c30
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c67
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log.c38
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_trans_ail.c442
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_priv.h22
-rw-r--r--fs/xfs/xfs_vnodeops.c4
311 files changed, 6718 insertions, 5990 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 0ee594569dcc..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
286 286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) 287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{ 288{
289 int err, flags; 289 int err;
290 struct p9_fid *fid; 290 struct p9_fid *fid;
291 struct v9fs_session_info *v9ses;
292 291
293 v9ses = v9fs_dentry2v9ses(dentry);
294 fid = v9fs_fid_clone_with_uid(dentry, 0); 292 fid = v9fs_fid_clone_with_uid(dentry, 0);
295 if (IS_ERR(fid)) 293 if (IS_ERR(fid))
296 goto error_out; 294 goto error_out;
@@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
299 * dirty pages. We always request for the open fid in read-write 297 * dirty pages. We always request for the open fid in read-write
300 * mode so that a partial page write which result in page 298 * mode so that a partial page write which result in page
301 * read can work. 299 * read can work.
302 *
303 * we don't have a tsyncfs operation for older version
304 * of protocol. So make sure the write back fid is
305 * opened in O_SYNC mode.
306 */ 300 */
307 if (!v9fs_proto_dotl(v9ses)) 301 err = p9_client_open(fid, O_RDWR);
308 flags = O_RDWR | O_SYNC;
309 else
310 flags = O_RDWR;
311
312 err = p9_client_open(fid, flags);
313 if (err < 0) { 302 if (err < 0) {
314 p9_client_clunk(fid); 303 p9_client_clunk(fid);
315 fid = ERR_PTR(err); 304 fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9665c2b840e6..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
118 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120}; 119};
121 120
122/* cache_validity flags */ 121/* cache_validity flags */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
126 retval = v9fs_refresh_inode_dotl(fid, inode); 126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else 127 else
128 retval = v9fs_refresh_inode(fid, inode); 128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0) 129 if (retval == -ENOENT)
130 return 0;
131 if (retval < 0)
130 return retval; 132 return retval;
131 } 133 }
132out_valid: 134out_valid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ffbb113d5f33..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
811 fid = v9fs_fid_lookup(dentry); 811 fid = v9fs_fid_lookup(dentry);
812 if (IS_ERR(fid)) { 812 if (IS_ERR(fid)) {
813 __putname(link); 813 __putname(link);
814 link = ERR_PTR(PTR_ERR(fid)); 814 link = ERR_CAST(fid);
815 goto ndset; 815 goto ndset;
816 } 816 }
817 retval = p9_client_readlink(fid, &target); 817 retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f3eed3383e4f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
154 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157
157 root = d_alloc_root(inode); 158 root = d_alloc_root(inode);
158 if (!root) { 159 if (!root) {
159 iput(inode); 160 iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
185 p9stat_free(st); 186 p9stat_free(st);
186 kfree(st); 187 kfree(st);
187 } 188 }
188 v9fs_fid_add(root, fid);
189 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
190 if (retval) 190 if (retval)
191 goto release_sb; 191 goto release_sb;
192 /* 192 v9fs_fid_add(root, fid);
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
203 193
204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 194 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
205 return dget(sb->s_root); 195 return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
210 v9fs_session_close(v9ses); 200 v9fs_session_close(v9ses);
211 kfree(v9ses); 201 kfree(v9ses);
212 return ERR_PTR(retval); 202 return ERR_PTR(retval);
203
213release_sb: 204release_sb:
214 /* 205 /*
215 * we will do the session_close and root dentry 206 * we will do the session_close and root dentry release
216 * release in the below call. 207 * in the below call. But we need to clunk fid, because we haven't
208 * attached the fid to dentry so it won't get clunked
209 * automatically.
217 */ 210 */
211 p9_client_clunk(fid);
218 deactivate_locked_super(sb); 212 deactivate_locked_super(sb);
219 return ERR_PTR(retval); 213 return ERR_PTR(retval);
220} 214}
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 226 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
233 227
234 kill_anon_super(s); 228 kill_anon_super(s);
235 p9_client_clunk(v9ses->root_fid); 229
236 v9fs_session_cancel(v9ses); 230 v9fs_session_cancel(v9ses);
237 v9fs_session_close(v9ses); 231 v9fs_session_close(v9ses);
238 kfree(v9ses); 232 kfree(v9ses);
@@ -285,14 +279,6 @@ done:
285 return res; 279 return res;
286} 280}
287 281
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode) 282static int v9fs_drop_inode(struct inode *inode)
297{ 283{
298 struct v9fs_session_info *v9ses; 284 struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
307 return 1; 293 return 1;
308} 294}
309 295
296static int v9fs_write_inode(struct inode *inode,
297 struct writeback_control *wbc)
298{
299 int ret;
300 struct p9_wstat wstat;
301 struct v9fs_inode *v9inode;
302 /*
303 * send an fsync request to server irrespective of
304 * wbc->sync_mode.
305 */
306 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
307 v9inode = V9FS_I(inode);
308 if (!v9inode->writeback_fid)
309 return 0;
310 v9fs_blank_wstat(&wstat);
311
312 ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
313 if (ret < 0) {
314 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
315 return ret;
316 }
317 return 0;
318}
319
320static int v9fs_write_inode_dotl(struct inode *inode,
321 struct writeback_control *wbc)
322{
323 int ret;
324 struct v9fs_inode *v9inode;
325 /*
326 * send an fsync request to server irrespective of
327 * wbc->sync_mode.
328 */
329 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
330 v9inode = V9FS_I(inode);
331 if (!v9inode->writeback_fid)
332 return 0;
333 ret = p9_client_fsync(v9inode->writeback_fid, 0);
334 if (ret < 0) {
335 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
336 return ret;
337 }
338 return 0;
339}
340
310static const struct super_operations v9fs_super_ops = { 341static const struct super_operations v9fs_super_ops = {
311 .alloc_inode = v9fs_alloc_inode, 342 .alloc_inode = v9fs_alloc_inode,
312 .destroy_inode = v9fs_destroy_inode, 343 .destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
314 .evict_inode = v9fs_evict_inode, 345 .evict_inode = v9fs_evict_inode,
315 .show_options = generic_show_options, 346 .show_options = generic_show_options,
316 .umount_begin = v9fs_umount_begin, 347 .umount_begin = v9fs_umount_begin,
348 .write_inode = v9fs_write_inode,
317}; 349};
318 350
319static const struct super_operations v9fs_super_ops_dotl = { 351static const struct super_operations v9fs_super_ops_dotl = {
320 .alloc_inode = v9fs_alloc_inode, 352 .alloc_inode = v9fs_alloc_inode,
321 .destroy_inode = v9fs_destroy_inode, 353 .destroy_inode = v9fs_destroy_inode,
322 .sync_fs = v9fs_sync_fs,
323 .statfs = v9fs_statfs, 354 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode, 355 .drop_inode = v9fs_drop_inode,
325 .evict_inode = v9fs_evict_inode, 356 .evict_inode = v9fs_evict_inode,
326 .show_options = generic_show_options, 357 .show_options = generic_show_options,
327 .umount_begin = v9fs_umount_begin, 358 .umount_begin = v9fs_umount_begin,
359 .write_inode = v9fs_write_inode_dotl,
328}; 360};
329 361
330struct file_system_type v9fs_fs_type = { 362struct file_system_type v9fs_fs_type = {
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index d1a5932bb0f1..6935f05202ac 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -51,7 +51,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
51 51
52/* 52/*
53 * This is fun. We need to load up to 19 bits from the map at an 53 * This is fun. We need to load up to 19 bits from the map at an
54 * arbitary bit alignment. (We're limited to 19 bits by F+ version 2). 54 * arbitrary bit alignment. (We're limited to 19 bits by F+ version 2).
55 */ 55 */
56#define GET_FRAG_ID(_map,_start,_idmask) \ 56#define GET_FRAG_ID(_map,_start,_idmask) \
57 ({ \ 57 ({ \
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 0fb315dd4d2a..577763c3d88b 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -98,7 +98,7 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
98} 98}
99 99
100/* 100/*
101 * provide new auxilliary cache data 101 * provide new auxiliary cache data
102 */ 102 */
103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, 103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
104 void *buffer, uint16_t bufmax) 104 void *buffer, uint16_t bufmax)
@@ -117,7 +117,7 @@ static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
117} 117}
118 118
119/* 119/*
120 * check that the auxilliary data indicates that the entry is still valid 120 * check that the auxiliary data indicates that the entry is still valid
121 */ 121 */
122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, 122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
123 const void *buffer, 123 const void *buffer,
@@ -150,7 +150,7 @@ static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
150} 150}
151 151
152/* 152/*
153 * provide new auxilliary cache data 153 * provide new auxiliary cache data
154 */ 154 */
155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, 155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
156 void *buffer, uint16_t bufmax) 156 void *buffer, uint16_t bufmax)
@@ -172,7 +172,7 @@ static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
172} 172}
173 173
174/* 174/*
175 * check that the auxilliary data indicates that the entry is still valid 175 * check that the auxiliary data indicates that the entry is still valid
176 */ 176 */
177static 177static
178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, 178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
@@ -283,7 +283,7 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
283} 283}
284 284
285/* 285/*
286 * provide new auxilliary cache data 286 * provide new auxiliary cache data
287 */ 287 */
288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, 288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
289 void *buffer, uint16_t bufmax) 289 void *buffer, uint16_t bufmax)
@@ -309,7 +309,7 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
309} 309}
310 310
311/* 311/*
312 * check that the auxilliary data indicates that the entry is still valid 312 * check that the auxiliary data indicates that the entry is still valid
313 */ 313 */
314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, 314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
315 const void *buffer, 315 const void *buffer,
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 0d5eeadf6121..3c090b7555ea 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -293,7 +293,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
293 if (!cell) { 293 if (!cell) {
294 /* this should not happen unless user tries to mount 294 /* this should not happen unless user tries to mount
295 * when root cell is not set. Return an impossibly 295 * when root cell is not set. Return an impossibly
296 * bizzare errno to alert the user. Things like 296 * bizarre errno to alert the user. Things like
297 * ENOENT might be "more appropriate" but they happen 297 * ENOENT might be "more appropriate" but they happen
298 * for other reasons. 298 * for other reasons.
299 */ 299 */
diff --git a/fs/attr.c b/fs/attr.c
index 1007ed616314..91dbe2a107f2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
128 * setattr_copy must be called with i_mutex held. 128 * setattr_copy must be called with i_mutex held.
129 * 129 *
130 * setattr_copy updates the inode's metadata with that specified 130 * setattr_copy updates the inode's metadata with that specified
131 * in attr. Noticably missing is inode size update, which is more complex 131 * in attr. Noticeably missing is inode size update, which is more complex
132 * as it requires pagecache updates. 132 * as it requires pagecache updates.
133 * 133 *
134 * The inode is not marked as dirty after this operation. The rationale is 134 * The inode is not marked as dirty after this operation. The rationale is
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 96804a17bbd0..f55ae23b137e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -612,7 +612,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
612 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves 612 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves
613 * of the directory tree. There is no need to clear the automount flag 613 * of the directory tree. There is no need to clear the automount flag
614 * following a mount or restore it after an expire because these mounts 614 * following a mount or restore it after an expire because these mounts
615 * are always covered. However, it is neccessary to ensure that these 615 * are always covered. However, it is necessary to ensure that these
616 * flags are clear on non-empty directories to avoid unnecessary calls 616 * flags are clear on non-empty directories to avoid unnecessary calls
617 * during path walks. 617 * during path walks.
618 */ 618 */
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index ce8c787916be..75a461cfaca6 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -24,7 +24,7 @@ Version 0.9 (2002-03-14)
24 24
25Version 0.64 (2002-02-07) 25Version 0.64 (2002-02-07)
26========== 26==========
27* Did the string comparision really right this time (btree.c) [WD] 27* Did the string comparison really right this time (btree.c) [WD]
28 28
29* Fixed up some places where I assumed that a long int could hold 29* Fixed up some places where I assumed that a long int could hold
30 a pointer value. (btree.c) [WD] 30 a pointer value. (btree.c) [WD]
@@ -114,7 +114,7 @@ Version 0.6 (2001-12-15)
114 More flexible. Will soon be controllable at mount time 114 More flexible. Will soon be controllable at mount time
115 (see TODO). [WD] 115 (see TODO). [WD]
116 116
117* Rewrote datastream positon lookups. 117* Rewrote datastream position lookups.
118 (datastream.c) [WD] 118 (datastream.c) [WD]
119 119
120* Moved the TODO list to its own file. 120* Moved the TODO list to its own file.
@@ -150,7 +150,7 @@ Version 0.50 (2001-11-13)
150* Anton also told me that the blocksize is not allowed to be larger than 150* Anton also told me that the blocksize is not allowed to be larger than
151 the page size in linux, which is 4k i386. Oops. Added a test for 151 the page size in linux, which is 4k i386. Oops. Added a test for
152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this 152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this
153 practicaly means is that 8k blocksize volumes won't work without a major 153 practically means is that 8k blocksize volumes won't work without a major
154 restructuring of the driver (or an alpha or other 64bit hardware). [WD] 154 restructuring of the driver (or an alpha or other 64bit hardware). [WD]
155 155
156* Cleaned up the befs_count_blocks() function. Much smarter now. 156* Cleaned up the befs_count_blocks() function. Much smarter now.
@@ -183,7 +183,7 @@ Version 0.45 (2001-10-29)
183 structures into the generic pointer fields of the public structures 183 structures into the generic pointer fields of the public structures
184 with kmalloc(). put_super and put_inode free them. This allows us not 184 with kmalloc(). put_super and put_inode free them. This allows us not
185 to have to touch the definitions of the public structures in 185 to have to touch the definitions of the public structures in
186 include/linux/fs.h. Also, befs_inode_info is huge (becuase of the 186 include/linux/fs.h. Also, befs_inode_info is huge (because of the
187 symlink string). (super.c, inode.c, befs_fs.h) [WD] 187 symlink string). (super.c, inode.c, befs_fs.h) [WD]
188 188
189* Fixed a thinko that was corrupting file reads after the first block_run 189* Fixed a thinko that was corrupting file reads after the first block_run
@@ -404,7 +404,7 @@ Version 0.4 (2001-10-28)
404 404
405* Fixed compile errors on 2.4.1 kernel (WD) 405* Fixed compile errors on 2.4.1 kernel (WD)
406 Resolve rejected patches 406 Resolve rejected patches
407 Accomodate changed NLS interface (util.h) 407 Accommodate changed NLS interface (util.h)
408 Needed to include <linux/slab.h> in most files 408 Needed to include <linux/slab.h> in most files
409 Makefile changes 409 Makefile changes
410 fs/Config.in changes 410 fs/Config.in changes
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index 7893eaa1e58c..eb557d9dc8be 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -234,7 +234,7 @@ typedef struct {
234} PACKED befs_btree_super; 234} PACKED befs_btree_super;
235 235
236/* 236/*
237 * Header stucture of each btree node 237 * Header structure of each btree node
238 */ 238 */
239typedef struct { 239typedef struct {
240 fs64 left; 240 fs64 left;
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 4202db7496cb..a66c9b1136e0 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Licensed under the GNU GPL. See the file COPYING for details. 6 * Licensed under the GNU GPL. See the file COPYING for details.
7 * 7 *
8 * 2002-02-05: Sergey S. Kostyliov added binary search withing 8 * 2002-02-05: Sergey S. Kostyliov added binary search within
9 * btree nodes. 9 * btree nodes.
10 * 10 *
11 * Many thanks to: 11 * Many thanks to:
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 06457ed8f3e7..54b8c28bebc8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -734,7 +734,7 @@ parse_options(char *options, befs_mount_options * opts)
734 734
735/* This function has the responsibiltiy of getting the 735/* This function has the responsibiltiy of getting the
736 * filesystem ready for unmounting. 736 * filesystem ready for unmounting.
737 * Basicly, we free everything that we allocated in 737 * Basically, we free everything that we allocated in
738 * befs_read_inode 738 * befs_read_inode
739 */ 739 */
740static void 740static void
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 current->mm->start_stack = bprm->p; 941 current->mm->start_stack = bprm->p;
942 942
943#ifdef arch_randomize_brk 943#ifdef arch_randomize_brk
944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) 944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945 current->mm->brk = current->mm->start_brk = 945 current->mm->brk = current->mm->start_brk =
946 arch_randomize_brk(current->mm); 946 arch_randomize_brk(current->mm);
947#ifdef CONFIG_COMPAT_BRK
948 current->brk_randomized = 1;
949#endif
950 }
947#endif 951#endif
948 952
949 if (current->personality & MMAP_PAGE_ZERO) { 953 if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 811384bec8de..397d3057d336 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -717,7 +717,7 @@ static int load_flat_file(struct linux_binprm * bprm,
717 * help simplify all this mumbo jumbo 717 * help simplify all this mumbo jumbo
718 * 718 *
719 * We've got two different sections of relocation entries. 719 * We've got two different sections of relocation entries.
720 * The first is the GOT which resides at the begining of the data segment 720 * The first is the GOT which resides at the beginning of the data segment
721 * and is terminated with a -1. This one can be relocated in place. 721 * and is terminated with a -1. This one can be relocated in place.
722 * The second is the extra relocation entries tacked after the image's 722 * The second is the extra relocation entries tacked after the image's
723 * data segment. These require a little more processing as the entry is 723 * data segment. These require a little more processing as the entry is
diff --git a/fs/bio.c b/fs/bio.c
index 4d6d4b6c2bf1..840a0d755248 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1436,7 +1436,7 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
1436 * preferred way to end I/O on a bio, it takes care of clearing 1436 * preferred way to end I/O on a bio, it takes care of clearing
1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the 1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the
1438 * established -Exxxx (-EIO, for instance) error values in case 1438 * established -Exxxx (-EIO, for instance) error values in case
1439 * something went wrong. Noone should call bi_end_io() directly on a 1439 * something went wrong. No one should call bi_end_io() directly on a
1440 * bio unless they own it and thus know that it has an end_io 1440 * bio unless they own it and thus know that it has an end_io
1441 * function. 1441 * function.
1442 **/ 1442 **/
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c1511c674f53..257b00e98428 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -653,7 +653,7 @@ void bd_forget(struct inode *inode)
653 * @whole: whole block device containing @bdev, may equal @bdev 653 * @whole: whole block device containing @bdev, may equal @bdev
654 * @holder: holder trying to claim @bdev 654 * @holder: holder trying to claim @bdev
655 * 655 *
656 * Test whther @bdev can be claimed by @holder. 656 * Test whether @bdev can be claimed by @holder.
657 * 657 *
658 * CONTEXT: 658 * CONTEXT:
659 * spin_lock(&bdev_lock). 659 * spin_lock(&bdev_lock).
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1102 if (!bdev->bd_part) 1102 if (!bdev->bd_part)
1103 goto out_clear; 1103 goto out_clear;
1104 1104
1105 ret = 0;
1105 if (disk->fops->open) { 1106 if (disk->fops->open) {
1106 ret = disk->fops->open(bdev, mode); 1107 ret = disk->fops->open(bdev, mode);
1107 if (ret == -ERESTARTSYS) { 1108 if (ret == -ERESTARTSYS) {
@@ -1118,9 +1119,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1118 put_disk(disk); 1119 put_disk(disk);
1119 goto restart; 1120 goto restart;
1120 } 1121 }
1121 if (ret)
1122 goto out_clear;
1123 } 1122 }
1123 /*
1124 * If the device is invalidated, rescan partition
1125 * if open succeeded or failed with -ENOMEDIUM.
1126 * The latter is necessary to prevent ghost
1127 * partitions on a removed medium.
1128 */
1129 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1130 rescan_partitions(disk, bdev);
1131 if (ret)
1132 goto out_clear;
1133
1124 if (!bdev->bd_openers) { 1134 if (!bdev->bd_openers) {
1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1135 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1126 bdi = blk_get_backing_dev_info(bdev); 1136 bdi = blk_get_backing_dev_info(bdev);
@@ -1128,8 +1138,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1128 bdi = &default_backing_dev_info; 1138 bdi = &default_backing_dev_info;
1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi); 1139 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1130 } 1140 }
1131 if (bdev->bd_invalidated)
1132 rescan_partitions(disk, bdev);
1133 } else { 1141 } else {
1134 struct block_device *whole; 1142 struct block_device *whole;
1135 whole = bdget_disk(disk, 0); 1143 whole = bdget_disk(disk, 0);
@@ -1153,13 +1161,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1153 } 1161 }
1154 } else { 1162 } else {
1155 if (bdev->bd_contains == bdev) { 1163 if (bdev->bd_contains == bdev) {
1156 if (bdev->bd_disk->fops->open) { 1164 ret = 0;
1165 if (bdev->bd_disk->fops->open)
1157 ret = bdev->bd_disk->fops->open(bdev, mode); 1166 ret = bdev->bd_disk->fops->open(bdev, mode);
1158 if (ret) 1167 /* the same as first opener case, read comment there */
1159 goto out_unlock_bdev; 1168 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1160 }
1161 if (bdev->bd_invalidated)
1162 rescan_partitions(bdev->bd_disk, bdev); 1169 rescan_partitions(bdev->bd_disk, bdev);
1170 if (ret)
1171 goto out_unlock_bdev;
1163 } 1172 }
1164 /* only one opener holds refs to the module and disk */ 1173 /* only one opener holds refs to the module and disk */
1165 module_put(disk->fops->owner); 1174 module_put(disk->fops->owner);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..44ea5b92e1ba 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,18 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (IS_ERR(acl))
182 value = NULL;
183 size = 0;
184 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 182 return PTR_ERR(acl);
183
184 if (acl) {
185 ret = posix_acl_valid(acl);
186 if (ret)
187 goto out;
186 } 188 }
187 } 189 }
188 190
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 191 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 192out:
191 posix_acl_release(acl); 193 posix_acl_release(acl);
192 194
193 return ret; 195 return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d47ce8307854..8f4b81de3ae2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -718,7 +718,7 @@ struct btrfs_space_info {
718 u64 total_bytes; /* total bytes in the space, 718 u64 total_bytes; /* total bytes in the space,
719 this doesn't take mirrors into account */ 719 this doesn't take mirrors into account */
720 u64 bytes_used; /* total bytes used, 720 u64 bytes_used; /* total bytes used,
721 this does't take mirrors into account */ 721 this doesn't take mirrors into account */
722 u64 bytes_pinned; /* total bytes pinned, will be freed when the 722 u64 bytes_pinned; /* total bytes pinned, will be freed when the
723 transaction finishes */ 723 transaction finishes */
724 u64 bytes_reserved; /* total bytes the allocator has reserved for 724 u64 bytes_reserved; /* total bytes the allocator has reserved for
@@ -740,8 +740,10 @@ struct btrfs_space_info {
740 */ 740 */
741 unsigned long reservation_progress; 741 unsigned long reservation_progress;
742 742
743 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
745 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
746 this space */ 748 this space */
747 749
@@ -1284,6 +1286,8 @@ struct btrfs_root {
1284#define BTRFS_INODE_DIRSYNC (1 << 10) 1286#define BTRFS_INODE_DIRSYNC (1 << 10)
1285#define BTRFS_INODE_COMPRESS (1 << 11) 1287#define BTRFS_INODE_COMPRESS (1 << 11)
1286 1288
1289#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1290
1287/* some macros to generate set/get funcs for the struct fields. This 1291/* some macros to generate set/get funcs for the struct fields. This
1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1292 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1289 * one for u8: 1293 * one for u8:
@@ -2359,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2359int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2363int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2360int btrfs_set_root_node(struct btrfs_root_item *item, 2364int btrfs_set_root_node(struct btrfs_root_item *item,
2361 struct extent_buffer *node); 2365 struct extent_buffer *node);
2366void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2367
2362/* dir-item.c */ 2368/* dir-item.c */
2363int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2369int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2364 struct btrfs_root *root, const char *name, 2370 struct btrfs_root *root, const char *name,
@@ -2572,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2572int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2573 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2574int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2575 2586
2576/* tree-defrag.c */ 2587/* tree-defrag.c */
2577int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7a7315bd031..228cf36ece83 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1275,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1275 root->commit_root = btrfs_root_node(root); 1275 root->commit_root = btrfs_root_node(root);
1276 BUG_ON(!root->node); 1276 BUG_ON(!root->node);
1277out: 1277out:
1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) 1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1279 root->ref_cows = 1; 1279 root->ref_cows = 1;
1280 btrfs_check_and_init_root_item(&root->root_item);
1281 }
1280 1282
1281 return root; 1283 return root;
1282} 1284}
@@ -2822,6 +2824,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2822 2824
2823 spin_lock(&delayed_refs->lock); 2825 spin_lock(&delayed_refs->lock);
2824 if (delayed_refs->num_entries == 0) { 2826 if (delayed_refs->num_entries == 0) {
2827 spin_unlock(&delayed_refs->lock);
2825 printk(KERN_INFO "delayed_refs has NO entry\n"); 2828 printk(KERN_INFO "delayed_refs has NO entry\n");
2826 return ret; 2829 return ret;
2827 } 2830 }
@@ -3055,7 +3058,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3055 btrfs_destroy_pinned_extent(root, 3058 btrfs_destroy_pinned_extent(root,
3056 root->fs_info->pinned_extents); 3059 root->fs_info->pinned_extents);
3057 3060
3058 t->use_count = 0; 3061 atomic_set(&t->use_count, 0);
3059 list_del_init(&t->list); 3062 list_del_init(&t->list);
3060 memset(t, 0, sizeof(*t)); 3063 memset(t, 0, sizeof(*t));
3061 kmem_cache_free(btrfs_transaction_cachep, t); 3064 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..9ee6bd55e16c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3023 *space_info = found; 3043 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3171 u64 alloc_target;
3152 3172
3153 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3155alloc: 3175alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
3160 3180
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3186 if (ret < 0) {
3166 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3260 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3264 }
3244 rcu_read_unlock(); 3265 rcu_read_unlock();
3245} 3266}
3246 3267
3247static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3249{ 3271{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3274 u64 thresh;
3252 3275
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3301 return 0;
3256 3302
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3304 return 0;
3260 3305
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3310
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3312 return 0;
3266
3267 return 1; 3313 return 1;
3268} 3314}
3269 3315
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3273{ 3319{
3274 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3275 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3276 int ret = 0; 3323 int ret = 0;
3277 3324
3278 mutex_lock(&fs_info->chunk_mutex);
3279
3280 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3281 3326
3282 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3287 } 3332 }
3288 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3289 3334
3335again:
3290 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3292 force = 1; 3338 force = space_info->force_alloc;
3293 if (space_info->full) { 3339 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3295 goto out; 3341 return 0;
3296 } 3342 }
3297 3343
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3301 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3302 } 3351 }
3352
3303 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3304 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3305 /* 3369 /*
3306 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3307 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3391 space_info->full = 1;
3328 else 3392 else
3329 ret = 1; 3393 ret = 1;
3330 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3331 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3332out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3334 return ret; 3399 return ret;
3335} 3400}
@@ -5303,11 +5368,13 @@ loop:
5303 5368
5304 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5378 }
5312 5379
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
5393 */ 5460 */
5394 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5397 5465
5398 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5477 goto again;
5410 } 5478 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -7991,6 +8059,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7991 u64 group_start = group->key.objectid; 8059 u64 group_start = group->key.objectid;
7992 new_extents = kmalloc(sizeof(*new_extents), 8060 new_extents = kmalloc(sizeof(*new_extents),
7993 GFP_NOFS); 8061 GFP_NOFS);
8062 if (!new_extents) {
8063 ret = -ENOMEM;
8064 goto out;
8065 }
7994 nr_extents = 1; 8066 nr_extents = 1;
7995 ret = get_new_locations(reloc_inode, 8067 ret = get_new_locations(reloc_inode,
7996 extent_key, 8068 extent_key,
@@ -8109,13 +8181,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8181
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8182 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8183 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8184 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8185 CHUNK_ALLOC_FORCE);
8113 8186
8114 ret = set_block_group_ro(cache); 8187 ret = set_block_group_ro(cache);
8115 if (!ret) 8188 if (!ret)
8116 goto out; 8189 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8190 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8191 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8192 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8193 if (ret < 0)
8120 goto out; 8194 goto out;
8121 ret = set_block_group_ro(cache); 8195 ret = set_block_group_ro(cache);
@@ -8128,7 +8202,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8202 struct btrfs_root *root, u64 type)
8129{ 8203{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8204 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8205 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8206 CHUNK_ALLOC_FORCE);
8132} 8207}
8133 8208
8134/* 8209/*
@@ -8781,23 +8856,38 @@ out:
8781int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 8856int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8782{ 8857{
8783 struct btrfs_space_info *space_info; 8858 struct btrfs_space_info *space_info;
8859 struct btrfs_super_block *disk_super;
8860 u64 features;
8861 u64 flags;
8862 int mixed = 0;
8784 int ret; 8863 int ret;
8785 8864
8786 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, 8865 disk_super = &fs_info->super_copy;
8787 &space_info); 8866 if (!btrfs_super_root(disk_super))
8788 if (ret) 8867 return 1;
8789 return ret;
8790 8868
8791 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, 8869 features = btrfs_super_incompat_flags(disk_super);
8792 &space_info); 8870 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
8793 if (ret) 8871 mixed = 1;
8794 return ret;
8795 8872
8796 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, 8873 flags = BTRFS_BLOCK_GROUP_SYSTEM;
8797 &space_info); 8874 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8798 if (ret) 8875 if (ret)
8799 return ret; 8876 goto out;
8800 8877
8878 if (mixed) {
8879 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
8880 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8881 } else {
8882 flags = BTRFS_BLOCK_GROUP_METADATA;
8883 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8884 if (ret)
8885 goto out;
8886
8887 flags = BTRFS_BLOCK_GROUP_DATA;
8888 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8889 }
8890out:
8801 return ret; 8891 return ret;
8802} 8892}
8803 8893
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
13#include "extent_io.h" 14#include "extent_io.h"
14#include "extent_map.h" 15#include "extent_map.h"
15#include "compat.h" 16#include "compat.h"
@@ -690,6 +691,15 @@ static void cache_state(struct extent_state *state,
690 } 691 }
691} 692}
692 693
694static void uncache_state(struct extent_state **cached_ptr)
695{
696 if (cached_ptr && (*cached_ptr)) {
697 struct extent_state *state = *cached_ptr;
698 *cached_ptr = NULL;
699 free_extent_state(state);
700 }
701}
702
693/* 703/*
694 * set some bits on a range in the tree. This may require allocations or 704 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 705 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +950,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 950}
941 951
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 952int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 953 struct extent_state **cached_state, gfp_t mask)
944{ 954{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 955 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 956 NULL, cached_state, mask);
947} 957}
948 958
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 959static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1022,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1022 mask);
1013} 1023}
1014 1024
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1025int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1026{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1027 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1028 mask);
@@ -1735,6 +1744,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1744
1736 do { 1745 do {
1737 struct page *page = bvec->bv_page; 1746 struct page *page = bvec->bv_page;
1747 struct extent_state *cached = NULL;
1748 struct extent_state *state;
1749
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1750 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1751
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1752 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1761,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1761 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1762 prefetchw(&bvec->bv_page->flags);
1751 1763
1764 spin_lock(&tree->lock);
1765 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1766 if (state && state->start == start) {
1767 /*
1768 * take a reference on the state, unlock will drop
1769 * the ref
1770 */
1771 cache_state(state, &cached);
1772 }
1773 spin_unlock(&tree->lock);
1774
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1775 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1776 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1777 state);
1755 if (ret) 1778 if (ret)
1756 uptodate = 0; 1779 uptodate = 0;
1757 } 1780 }
@@ -1764,15 +1787,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1787 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1788 if (err)
1766 uptodate = 0; 1789 uptodate = 0;
1790 uncache_state(&cached);
1767 continue; 1791 continue;
1768 } 1792 }
1769 } 1793 }
1770 1794
1771 if (uptodate) { 1795 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1796 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1797 GFP_ATOMIC);
1774 } 1798 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1799 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1800
1777 if (whole_page) { 1801 if (whole_page) {
1778 if (uptodate) { 1802 if (uptodate) {
@@ -1811,6 +1835,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1835
1812 do { 1836 do {
1813 struct page *page = bvec->bv_page; 1837 struct page *page = bvec->bv_page;
1838 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1839 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1840
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1841 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1846,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1846 prefetchw(&bvec->bv_page->flags);
1822 1847
1823 if (uptodate) { 1848 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1849 set_extent_uptodate(tree, start, end, &cached,
1850 GFP_ATOMIC);
1825 } else { 1851 } else {
1826 ClearPageUptodate(page); 1852 ClearPageUptodate(page);
1827 SetPageError(page); 1853 SetPageError(page);
1828 } 1854 }
1829 1855
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1856 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1857
1832 } while (bvec >= bio->bi_io_vec); 1858 } while (bvec >= bio->bi_io_vec);
1833 1859
@@ -2016,14 +2042,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2042 while (cur <= end) {
2017 if (cur >= last_byte) { 2043 if (cur >= last_byte) {
2018 char *userpage; 2044 char *userpage;
2045 struct extent_state *cached = NULL;
2046
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2047 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2048 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2049 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2050 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2051 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2052 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2053 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2054 unlock_extent_cached(tree, cur, cur + iosize - 1,
2055 &cached, GFP_NOFS);
2027 break; 2056 break;
2028 } 2057 }
2029 em = get_extent(inode, page, page_offset, cur, 2058 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2092,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2092 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2093 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2094 char *userpage;
2095 struct extent_state *cached = NULL;
2096
2066 userpage = kmap_atomic(page, KM_USER0); 2097 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2098 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2099 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2100 kunmap_atomic(userpage, KM_USER0);
2070 2101
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2102 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2103 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2104 unlock_extent_cached(tree, cur, cur + iosize - 1,
2105 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2106 cur = cur + iosize;
2075 page_offset += iosize; 2107 page_offset += iosize;
2076 continue; 2108 continue;
@@ -2650,7 +2682,7 @@ int extent_readpages(struct extent_io_tree *tree,
2650 prefetchw(&page->flags); 2682 prefetchw(&page->flags);
2651 list_del(&page->lru); 2683 list_del(&page->lru);
2652 if (!add_to_page_cache_lru(page, mapping, 2684 if (!add_to_page_cache_lru(page, mapping,
2653 page->index, GFP_KERNEL)) { 2685 page->index, GFP_NOFS)) {
2654 __extent_read_full_page(tree, page, get_extent, 2686 __extent_read_full_page(tree, page, get_extent,
2655 &bio, 0, &bio_flags); 2687 &bio, 0, &bio_flags);
2656 } 2688 }
@@ -2789,9 +2821,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2789 iocount++; 2821 iocount++;
2790 block_start = block_start + iosize; 2822 block_start = block_start + iosize;
2791 } else { 2823 } else {
2792 set_extent_uptodate(tree, block_start, cur_end, 2824 struct extent_state *cached = NULL;
2825
2826 set_extent_uptodate(tree, block_start, cur_end, &cached,
2793 GFP_NOFS); 2827 GFP_NOFS);
2794 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2828 unlock_extent_cached(tree, block_start, cur_end,
2829 &cached, GFP_NOFS);
2795 block_start = cur_end + 1; 2830 block_start = cur_end + 1;
2796 } 2831 }
2797 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2832 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3492,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3457 num_pages = num_extent_pages(eb->start, eb->len); 3492 num_pages = num_extent_pages(eb->start, eb->len);
3458 3493
3459 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3494 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3460 GFP_NOFS); 3495 NULL, GFP_NOFS);
3461 for (i = 0; i < num_pages; i++) { 3496 for (i = 0; i < num_pages; i++) {
3462 page = extent_buffer_page(eb, i); 3497 page = extent_buffer_page(eb, i);
3463 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3498 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3920,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3885 kunmap_atomic(dst_kaddr, KM_USER0); 3920 kunmap_atomic(dst_kaddr, KM_USER0);
3886} 3921}
3887 3922
3923static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3924{
3925 unsigned long distance = (src > dst) ? src - dst : dst - src;
3926 return distance < len;
3927}
3928
3888static void copy_pages(struct page *dst_page, struct page *src_page, 3929static void copy_pages(struct page *dst_page, struct page *src_page,
3889 unsigned long dst_off, unsigned long src_off, 3930 unsigned long dst_off, unsigned long src_off,
3890 unsigned long len) 3931 unsigned long len)
@@ -3892,10 +3933,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3892 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3933 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3893 char *src_kaddr; 3934 char *src_kaddr;
3894 3935
3895 if (dst_page != src_page) 3936 if (dst_page != src_page) {
3896 src_kaddr = kmap_atomic(src_page, KM_USER1); 3937 src_kaddr = kmap_atomic(src_page, KM_USER1);
3897 else 3938 } else {
3898 src_kaddr = dst_kaddr; 3939 src_kaddr = dst_kaddr;
3940 BUG_ON(areas_overlap(src_off, dst_off, len));
3941 }
3899 3942
3900 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3943 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3901 kunmap_atomic(dst_kaddr, KM_USER0); 3944 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4013,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3970 "len %lu len %lu\n", dst_offset, len, dst->len); 4013 "len %lu len %lu\n", dst_offset, len, dst->len);
3971 BUG_ON(1); 4014 BUG_ON(1);
3972 } 4015 }
3973 if (dst_offset < src_offset) { 4016 if (!areas_overlap(src_offset, dst_offset, len)) {
3974 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4017 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3975 return; 4018 return;
3976 } 4019 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
208 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
209 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
211 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
213 gfp_t mask); 213 gfp_t mask);
214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2b6c12e983b3..a24a3f2fa13e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -243,7 +243,7 @@ out:
243 * Insert @em into @tree or perform a simple forward/backward merge with 243 * Insert @em into @tree or perform a simple forward/backward merge with
244 * existing mappings. The extent_map struct passed in will be inserted 244 * existing mappings. The extent_map struct passed in will be inserted
245 * into the tree directly, with an additional reference taken, or a 245 * into the tree directly, with an additional reference taken, or a
246 * reference dropped if the merge attempt was successfull. 246 * reference dropped if the merge attempt was successful.
247 */ 247 */
248int add_extent_mapping(struct extent_map_tree *tree, 248int add_extent_mapping(struct extent_map_tree *tree,
249 struct extent_map *em) 249 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 656bc0a892b1..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -906,7 +903,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
906 unsigned long last_index; 903 unsigned long last_index;
907 size_t num_written = 0; 904 size_t num_written = 0;
908 int nrptrs; 905 int nrptrs;
909 int ret; 906 int ret = 0;
910 907
911 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 908 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
912 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 909 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427d8a9d..63731a1fb0a1 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h"
27 28
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
81 return ERR_PTR(-ENOENT); 82 return ERR_PTR(-ENOENT);
82 } 83 }
83 84
85 inode->i_mapping->flags &= ~__GFP_FS;
86
84 spin_lock(&block_group->lock); 87 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) { 88 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode); 89 block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
222 u64 num_entries; 225 u64 num_entries;
223 u64 num_bitmaps; 226 u64 num_bitmaps;
224 u64 generation; 227 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
225 u32 cur_crc = ~(u32)0; 229 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0; 230 pgoff_t index = 0;
227 unsigned long first_page_offset; 231 unsigned long first_page_offset;
@@ -467,6 +471,17 @@ next:
467 index++; 471 index++;
468 } 472 }
469 473
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
470 ret = 1; 485 ret = 1;
471out: 486out:
472 kfree(checksums); 487 kfree(checksums);
@@ -493,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root,
493 struct inode *inode; 508 struct inode *inode;
494 struct rb_node *node; 509 struct rb_node *node;
495 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
496 struct page *page; 512 struct page *page;
497 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
514 struct btrfs_free_cluster *cluster = NULL;
515 struct extent_io_tree *unpin = NULL;
498 struct list_head bitmap_list; 516 struct list_head bitmap_list;
499 struct btrfs_key key; 517 struct btrfs_key key;
518 u64 start, end, len;
500 u64 bytes = 0; 519 u64 bytes = 0;
501 u32 *crc, *checksums; 520 u32 *crc, *checksums;
502 pgoff_t index = 0, last_index = 0;
503 unsigned long first_page_offset; 521 unsigned long first_page_offset;
504 int num_checksums; 522 int index = 0, num_pages = 0;
505 int entries = 0; 523 int entries = 0;
506 int bitmaps = 0; 524 int bitmaps = 0;
507 int ret = 0; 525 int ret = 0;
526 bool next_page = false;
527 bool out_of_space = false;
508 528
509 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
510 530
@@ -532,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root,
532 return 0; 552 return 0;
533 } 553 }
534 554
535 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
536 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
537 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
538 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
539 560
540 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
541 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
542 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
543 if (!crc) { 563 if (!crc) {
544 iput(inode); 564 iput(inode);
545 return 0; 565 return 0;
546 } 566 }
547 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
548 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
549 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
550 * our entries. 577 * our entries.
551 */ 578 */
552 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster,
585 block_group_list);
586
587 /*
588 * We shouldn't have switched the pinned extents yet so this is the
589 * right one
590 */
591 unpin = root->fs_info->pinned_extents;
553 592
554 /* 593 /*
555 * Lock all pages first so we can lock the extent safely. 594 * Lock all pages first so we can lock the extent safely.
@@ -559,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
559 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
560 * know and don't freak out. 599 * know and don't freak out.
561 */ 600 */
562 while (index <= last_index) { 601 while (index < num_pages) {
563 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
564 if (!page) { 603 if (!page) {
565 pgoff_t i = 0; 604 int i;
566 605
567 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
568 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
569 unlock_page(page); 608 page_cache_release(pages[i]);
570 page_cache_release(page);
571 page_cache_release(page);
572 i++;
573 } 609 }
574 goto out_free; 610 goto out_free;
575 } 611 }
612 pages[index] = page;
576 index++; 613 index++;
577 } 614 }
578 615
@@ -580,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
580 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
581 0, &cached_state, GFP_NOFS); 618 0, &cached_state, GFP_NOFS);
582 619
620 /*
621 * When searching for pinned extents, we need to start at our start
622 * offset.
623 */
624 start = block_group->key.objectid;
625
583 /* Write out the extent entries */ 626 /* Write out the extent entries */
584 do { 627 do {
585 struct btrfs_free_space_entry *entry; 628 struct btrfs_free_space_entry *entry;
@@ -587,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
587 unsigned long offset = 0; 630 unsigned long offset = 0;
588 unsigned long start_offset = 0; 631 unsigned long start_offset = 0;
589 632
633 next_page = false;
634
590 if (index == 0) { 635 if (index == 0) {
591 start_offset = first_page_offset; 636 start_offset = first_page_offset;
592 offset = start_offset; 637 offset = start_offset;
593 } 638 }
594 639
595 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
596 646
597 addr = kmap(page); 647 addr = kmap(page);
598 entry = addr + start_offset; 648 entry = addr + start_offset;
599 649
600 memset(addr, 0, PAGE_CACHE_SIZE); 650 memset(addr, 0, PAGE_CACHE_SIZE);
601 while (1) { 651 while (node && !next_page) {
602 struct btrfs_free_space *e; 652 struct btrfs_free_space *e;
603 653
604 e = rb_entry(node, struct btrfs_free_space, offset_index); 654 e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -614,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
614 entry->type = BTRFS_FREE_SPACE_EXTENT; 664 entry->type = BTRFS_FREE_SPACE_EXTENT;
615 } 665 }
616 node = rb_next(node); 666 node = rb_next(node);
617 if (!node) 667 if (!node && cluster) {
618 break; 668 node = rb_first(&cluster->root);
669 cluster = NULL;
670 }
619 offset += sizeof(struct btrfs_free_space_entry); 671 offset += sizeof(struct btrfs_free_space_entry);
620 if (offset + sizeof(struct btrfs_free_space_entry) >= 672 if (offset + sizeof(struct btrfs_free_space_entry) >=
621 PAGE_CACHE_SIZE) 673 PAGE_CACHE_SIZE)
674 next_page = true;
675 entry++;
676 }
677
678 /*
679 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space
681 */
682 while (!next_page && (start < block_group->key.objectid +
683 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY);
686 if (ret) {
687 ret = 0;
688 break;
689 }
690
691 /* This pinned extent is out of our range */
692 if (start >= block_group->key.objectid +
693 block_group->key.offset)
622 break; 694 break;
695
696 len = block_group->key.objectid +
697 block_group->key.offset - start;
698 len = min(len, end + 1 - start);
699
700 entries++;
701 entry->offset = cpu_to_le64(start);
702 entry->bytes = cpu_to_le64(len);
703 entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705 start = end + 1;
706 offset += sizeof(struct btrfs_free_space_entry);
707 if (offset + sizeof(struct btrfs_free_space_entry) >=
708 PAGE_CACHE_SIZE)
709 next_page = true;
623 entry++; 710 entry++;
624 } 711 }
625 *crc = ~(u32)0; 712 *crc = ~(u32)0;
@@ -632,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
632 719
633 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
634 721
635 ClearPageChecked(page);
636 set_page_extent_mapped(page);
637 SetPageUptodate(page);
638 set_page_dirty(page);
639
640 /*
641 * We need to release our reference we got for grab_cache_page,
642 * except for the first page which will hold our checksums, we
643 * do that below.
644 */
645 if (index != 0) {
646 unlock_page(page);
647 page_cache_release(page);
648 }
649
650 page_cache_release(page);
651
652 index++; 722 index++;
653 } while (node); 723 } while (node || next_page);
654 724
655 /* Write out the bitmaps */ 725 /* Write out the bitmaps */
656 list_for_each_safe(pos, n, &bitmap_list) { 726 list_for_each_safe(pos, n, &bitmap_list) {
@@ -658,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
658 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
659 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
660 730
661 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
662 736
663 addr = kmap(page); 737 addr = kmap(page);
664 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -669,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
669 crc++; 743 crc++;
670 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
671 745
672 ClearPageChecked(page);
673 set_page_extent_mapped(page);
674 SetPageUptodate(page);
675 set_page_dirty(page);
676 unlock_page(page);
677 page_cache_release(page);
678 page_cache_release(page);
679 list_del_init(&entry->list); 746 list_del_init(&entry->list);
680 index++; 747 index++;
681 } 748 }
682 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
683 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
684 while (index <= last_index) { 760 while (index < num_pages) {
685 void *addr; 761 void *addr;
686 762
687 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
688
689 addr = kmap(page); 764 addr = kmap(page);
690 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
691 kunmap(page); 766 kunmap(page);
692 ClearPageChecked(page);
693 set_page_extent_mapped(page);
694 SetPageUptodate(page);
695 set_page_dirty(page);
696 unlock_page(page);
697 page_cache_release(page);
698 page_cache_release(page);
699 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
700 index++; 768 index++;
701 } 769 }
702 770
703 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
704
705 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
706 { 772 {
707 void *addr; 773 void *addr;
708 u64 *gen; 774 u64 *gen;
709 775
710 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
711 777
712 addr = kmap(page); 778 addr = kmap(page);
713 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
714 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
715 *gen = trans->transid; 781 *gen = trans->transid;
716 kunmap(page); 782 kunmap(page);
717 ClearPageChecked(page);
718 set_page_extent_mapped(page);
719 SetPageUptodate(page);
720 set_page_dirty(page);
721 unlock_page(page);
722 page_cache_release(page);
723 page_cache_release(page);
724 } 783 }
725 BTRFS_I(inode)->generation = trans->transid;
726 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
727 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
728 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
729 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
730 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
731 799
732 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -777,6 +845,7 @@ out_free:
777 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
778 } 846 }
779 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
780 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
781 iput(inode); 850 iput(inode);
782 return ret; 851 return ret;
@@ -1699,10 +1768,13 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1699 1768
1700 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 1769 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1701 info = rb_entry(node, struct btrfs_free_space, offset_index); 1770 info = rb_entry(node, struct btrfs_free_space, offset_index);
1702 unlink_free_space(block_group, info); 1771 if (!info->bitmap) {
1703 if (info->bitmap) 1772 unlink_free_space(block_group, info);
1704 kfree(info->bitmap); 1773 kmem_cache_free(btrfs_free_space_cachep, info);
1705 kmem_cache_free(btrfs_free_space_cachep, info); 1774 } else {
1775 free_bitmap(block_group, info);
1776 }
1777
1706 if (need_resched()) { 1778 if (need_resched()) {
1707 spin_unlock(&block_group->tree_lock); 1779 spin_unlock(&block_group->tree_lock);
1708 cond_resched(); 1780 cond_resched();
@@ -2232,7 +2304,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2232 start = entry->offset; 2304 start = entry->offset;
2233 bytes = min(entry->bytes, end - start); 2305 bytes = min(entry->bytes, end - start);
2234 unlink_free_space(block_group, entry); 2306 unlink_free_space(block_group, entry);
2235 kfree(entry); 2307 kmem_cache_free(btrfs_free_space_cachep, entry);
2236 } 2308 }
2237 2309
2238 spin_unlock(&block_group->tree_lock); 2310 spin_unlock(&block_group->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93c28a1d6bdc..7cd8ab0ef04d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -112,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root, struct inode *inode, 113 struct btrfs_root *root, struct inode *inode,
114 u64 start, size_t size, size_t compressed_size, 114 u64 start, size_t size, size_t compressed_size,
115 int compress_type,
115 struct page **compressed_pages) 116 struct page **compressed_pages)
116{ 117{
117 struct btrfs_key key; 118 struct btrfs_key key;
@@ -126,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
126 size_t cur_size = size; 127 size_t cur_size = size;
127 size_t datasize; 128 size_t datasize;
128 unsigned long offset; 129 unsigned long offset;
129 int compress_type = BTRFS_COMPRESS_NONE;
130 130
131 if (compressed_size && compressed_pages) { 131 if (compressed_size && compressed_pages)
132 compress_type = root->fs_info->compress_type;
133 cur_size = compressed_size; 132 cur_size = compressed_size;
134 }
135 133
136 path = btrfs_alloc_path(); 134 path = btrfs_alloc_path();
137 if (!path) 135 if (!path)
@@ -221,7 +219,7 @@ fail:
221static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 219static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
222 struct btrfs_root *root, 220 struct btrfs_root *root,
223 struct inode *inode, u64 start, u64 end, 221 struct inode *inode, u64 start, u64 end,
224 size_t compressed_size, 222 size_t compressed_size, int compress_type,
225 struct page **compressed_pages) 223 struct page **compressed_pages)
226{ 224{
227 u64 isize = i_size_read(inode); 225 u64 isize = i_size_read(inode);
@@ -254,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
254 inline_len = min_t(u64, isize, actual_end); 252 inline_len = min_t(u64, isize, actual_end);
255 ret = insert_inline_extent(trans, root, inode, start, 253 ret = insert_inline_extent(trans, root, inode, start,
256 inline_len, compressed_size, 254 inline_len, compressed_size,
257 compressed_pages); 255 compress_type, compressed_pages);
258 BUG_ON(ret); 256 BUG_ON(ret);
259 btrfs_delalloc_release_metadata(inode, end + 1 - start); 257 btrfs_delalloc_release_metadata(inode, end + 1 - start);
260 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 258 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -433,12 +431,13 @@ again:
433 * to make an uncompressed inline extent. 431 * to make an uncompressed inline extent.
434 */ 432 */
435 ret = cow_file_range_inline(trans, root, inode, 433 ret = cow_file_range_inline(trans, root, inode,
436 start, end, 0, NULL); 434 start, end, 0, 0, NULL);
437 } else { 435 } else {
438 /* try making a compressed inline extent */ 436 /* try making a compressed inline extent */
439 ret = cow_file_range_inline(trans, root, inode, 437 ret = cow_file_range_inline(trans, root, inode,
440 start, end, 438 start, end,
441 total_compressed, pages); 439 total_compressed,
440 compress_type, pages);
442 } 441 }
443 if (ret == 0) { 442 if (ret == 0) {
444 /* 443 /*
@@ -792,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
792 if (start == 0) { 791 if (start == 0) {
793 /* lets try to make an inline extent */ 792 /* lets try to make an inline extent */
794 ret = cow_file_range_inline(trans, root, inode, 793 ret = cow_file_range_inline(trans, root, inode,
795 start, end, 0, NULL); 794 start, end, 0, 0, NULL);
796 if (ret == 0) { 795 if (ret == 0) {
797 extent_clear_unlock_delalloc(inode, 796 extent_clear_unlock_delalloc(inode,
798 &BTRFS_I(inode)->io_tree, 797 &BTRFS_I(inode)->io_tree,
@@ -955,6 +954,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
955 1, 0, NULL, GFP_NOFS); 954 1, 0, NULL, GFP_NOFS);
956 while (start < end) { 955 while (start < end) {
957 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 956 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
957 BUG_ON(!async_cow);
958 async_cow->inode = inode; 958 async_cow->inode = inode;
959 async_cow->root = root; 959 async_cow->root = root;
960 async_cow->locked_page = locked_page; 960 async_cow->locked_page = locked_page;
@@ -1771,9 +1771,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1771 add_pending_csums(trans, inode, ordered_extent->file_offset, 1771 add_pending_csums(trans, inode, ordered_extent->file_offset,
1772 &ordered_extent->list); 1772 &ordered_extent->list);
1773 1773
1774 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1774 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1775 ret = btrfs_update_inode(trans, root, inode); 1775 if (!ret) {
1776 BUG_ON(ret); 1776 ret = btrfs_update_inode(trans, root, inode);
1777 BUG_ON(ret);
1778 }
1779 ret = 0;
1777out: 1780out:
1778 if (nolock) { 1781 if (nolock) {
1779 if (trans) 1782 if (trans)
@@ -2222,8 +2225,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2222 insert = 1; 2225 insert = 1;
2223#endif 2226#endif
2224 insert = 1; 2227 insert = 1;
2225 } else {
2226 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2227 } 2228 }
2228 2229
2229 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2230 if (!BTRFS_I(inode)->orphan_meta_reserved) {
@@ -2324,7 +2325,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2324 2325
2325 /* 2326 /*
2326 * if ret == 0 means we found what we were searching for, which 2327 * if ret == 0 means we found what we were searching for, which
2327 * is weird, but possible, so only screw with path if we didnt 2328 * is weird, but possible, so only screw with path if we didn't
2328 * find the key and see if we have stuff that matches 2329 * find the key and see if we have stuff that matches
2329 */ 2330 */
2330 if (ret > 0) { 2331 if (ret > 0) {
@@ -2537,8 +2538,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2538 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2538 2539
2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2540 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2540 if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
2541 inode->i_mapping->flags &= ~__GFP_FS;
2542 2541
2543 /* 2542 /*
2544 * try to precache a NULL acl entry for files that don't have 2543 * try to precache a NULL acl entry for files that don't have
@@ -2595,6 +2594,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2595 struct btrfs_inode_item *item, 2594 struct btrfs_inode_item *item,
2596 struct inode *inode) 2595 struct inode *inode)
2597{ 2596{
2597 if (!leaf->map_token)
2598 map_private_extent_buffer(leaf, (unsigned long)item,
2599 sizeof(struct btrfs_inode_item),
2600 &leaf->map_token, &leaf->kaddr,
2601 &leaf->map_start, &leaf->map_len,
2602 KM_USER1);
2603
2598 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2604 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2599 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2605 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2600 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2606 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2623,6 +2629,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2623 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2629 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2624 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2630 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2625 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2631 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2632
2633 if (leaf->map_token) {
2634 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2635 leaf->map_token = NULL;
2636 }
2626} 2637}
2627 2638
2628/* 2639/*
@@ -4212,10 +4223,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4212 struct btrfs_key found_key; 4223 struct btrfs_key found_key;
4213 struct btrfs_path *path; 4224 struct btrfs_path *path;
4214 int ret; 4225 int ret;
4215 u32 nritems;
4216 struct extent_buffer *leaf; 4226 struct extent_buffer *leaf;
4217 int slot; 4227 int slot;
4218 int advance;
4219 unsigned char d_type; 4228 unsigned char d_type;
4220 int over = 0; 4229 int over = 0;
4221 u32 di_cur; 4230 u32 di_cur;
@@ -4258,27 +4267,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4258 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4267 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4259 if (ret < 0) 4268 if (ret < 0)
4260 goto err; 4269 goto err;
4261 advance = 0;
4262 4270
4263 while (1) { 4271 while (1) {
4264 leaf = path->nodes[0]; 4272 leaf = path->nodes[0];
4265 nritems = btrfs_header_nritems(leaf);
4266 slot = path->slots[0]; 4273 slot = path->slots[0];
4267 if (advance || slot >= nritems) { 4274 if (slot >= btrfs_header_nritems(leaf)) {
4268 if (slot >= nritems - 1) { 4275 ret = btrfs_next_leaf(root, path);
4269 ret = btrfs_next_leaf(root, path); 4276 if (ret < 0)
4270 if (ret) 4277 goto err;
4271 break; 4278 else if (ret > 0)
4272 leaf = path->nodes[0]; 4279 break;
4273 nritems = btrfs_header_nritems(leaf); 4280 continue;
4274 slot = path->slots[0];
4275 } else {
4276 slot++;
4277 path->slots[0]++;
4278 }
4279 } 4281 }
4280 4282
4281 advance = 1;
4282 item = btrfs_item_nr(leaf, slot); 4283 item = btrfs_item_nr(leaf, slot);
4283 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4284 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4284 4285
@@ -4287,7 +4288,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4287 if (btrfs_key_type(&found_key) != key_type) 4288 if (btrfs_key_type(&found_key) != key_type)
4288 break; 4289 break;
4289 if (found_key.offset < filp->f_pos) 4290 if (found_key.offset < filp->f_pos)
4290 continue; 4291 goto next;
4291 4292
4292 filp->f_pos = found_key.offset; 4293 filp->f_pos = found_key.offset;
4293 4294
@@ -4340,6 +4341,8 @@ skip:
4340 di_cur += di_len; 4341 di_cur += di_len;
4341 di = (struct btrfs_dir_item *)((char *)di + di_len); 4342 di = (struct btrfs_dir_item *)((char *)di + di_len);
4342 } 4343 }
4344next:
4345 path->slots[0]++;
4343 } 4346 }
4344 4347
4345 /* Reached end of directory/root. Bump pos past the last item. */ 4348 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4532,14 +4535,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4532 BUG_ON(!path); 4535 BUG_ON(!path);
4533 4536
4534 inode = new_inode(root->fs_info->sb); 4537 inode = new_inode(root->fs_info->sb);
4535 if (!inode) 4538 if (!inode) {
4539 btrfs_free_path(path);
4536 return ERR_PTR(-ENOMEM); 4540 return ERR_PTR(-ENOMEM);
4541 }
4537 4542
4538 if (dir) { 4543 if (dir) {
4539 trace_btrfs_inode_request(dir); 4544 trace_btrfs_inode_request(dir);
4540 4545
4541 ret = btrfs_set_inode_index(dir, index); 4546 ret = btrfs_set_inode_index(dir, index);
4542 if (ret) { 4547 if (ret) {
4548 btrfs_free_path(path);
4543 iput(inode); 4549 iput(inode);
4544 return ERR_PTR(ret); 4550 return ERR_PTR(ret);
4545 } 4551 }
@@ -4726,9 +4732,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4726 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4727 dentry->d_name.len, dir->i_ino, objectid, 4733 dentry->d_name.len, dir->i_ino, objectid,
4728 BTRFS_I(dir)->block_group, mode, &index); 4734 BTRFS_I(dir)->block_group, mode, &index);
4729 err = PTR_ERR(inode); 4735 if (IS_ERR(inode)) {
4730 if (IS_ERR(inode)) 4736 err = PTR_ERR(inode);
4731 goto out_unlock; 4737 goto out_unlock;
4738 }
4732 4739
4733 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 4740 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4734 if (err) { 4741 if (err) {
@@ -4787,9 +4794,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4787 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4794 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4788 dentry->d_name.len, dir->i_ino, objectid, 4795 dentry->d_name.len, dir->i_ino, objectid,
4789 BTRFS_I(dir)->block_group, mode, &index); 4796 BTRFS_I(dir)->block_group, mode, &index);
4790 err = PTR_ERR(inode); 4797 if (IS_ERR(inode)) {
4791 if (IS_ERR(inode)) 4798 err = PTR_ERR(inode);
4792 goto out_unlock; 4799 goto out_unlock;
4800 }
4793 4801
4794 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 4802 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4795 if (err) { 4803 if (err) {
@@ -4839,9 +4847,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4839 if (inode->i_nlink == ~0U) 4847 if (inode->i_nlink == ~0U)
4840 return -EMLINK; 4848 return -EMLINK;
4841 4849
4842 btrfs_inc_nlink(inode);
4843 inode->i_ctime = CURRENT_TIME;
4844
4845 err = btrfs_set_inode_index(dir, &index); 4850 err = btrfs_set_inode_index(dir, &index);
4846 if (err) 4851 if (err)
4847 goto fail; 4852 goto fail;
@@ -4857,6 +4862,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4857 goto fail; 4862 goto fail;
4858 } 4863 }
4859 4864
4865 btrfs_inc_nlink(inode);
4866 inode->i_ctime = CURRENT_TIME;
4867
4860 btrfs_set_trans_block_group(trans, dir); 4868 btrfs_set_trans_block_group(trans, dir);
4861 ihold(inode); 4869 ihold(inode);
4862 4870
@@ -4994,6 +5002,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4994 inline_size = btrfs_file_extent_inline_item_len(leaf, 5002 inline_size = btrfs_file_extent_inline_item_len(leaf,
4995 btrfs_item_nr(leaf, path->slots[0])); 5003 btrfs_item_nr(leaf, path->slots[0]));
4996 tmp = kmalloc(inline_size, GFP_NOFS); 5004 tmp = kmalloc(inline_size, GFP_NOFS);
5005 if (!tmp)
5006 return -ENOMEM;
4997 ptr = btrfs_file_extent_inline_start(item); 5007 ptr = btrfs_file_extent_inline_start(item);
4998 5008
4999 read_extent_buffer(leaf, tmp, ptr, inline_size); 5009 read_extent_buffer(leaf, tmp, ptr, inline_size);
@@ -5226,7 +5236,7 @@ again:
5226 btrfs_mark_buffer_dirty(leaf); 5236 btrfs_mark_buffer_dirty(leaf);
5227 } 5237 }
5228 set_extent_uptodate(io_tree, em->start, 5238 set_extent_uptodate(io_tree, em->start,
5229 extent_map_end(em) - 1, GFP_NOFS); 5239 extent_map_end(em) - 1, NULL, GFP_NOFS);
5230 goto insert; 5240 goto insert;
5231 } else { 5241 } else {
5232 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5242 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5433,17 +5443,30 @@ out:
5433} 5443}
5434 5444
5435static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5445static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5446 struct extent_map *em,
5436 u64 start, u64 len) 5447 u64 start, u64 len)
5437{ 5448{
5438 struct btrfs_root *root = BTRFS_I(inode)->root; 5449 struct btrfs_root *root = BTRFS_I(inode)->root;
5439 struct btrfs_trans_handle *trans; 5450 struct btrfs_trans_handle *trans;
5440 struct extent_map *em;
5441 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5451 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5442 struct btrfs_key ins; 5452 struct btrfs_key ins;
5443 u64 alloc_hint; 5453 u64 alloc_hint;
5444 int ret; 5454 int ret;
5455 bool insert = false;
5445 5456
5446 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5457 /*
5458 * Ok if the extent map we looked up is a hole and is for the exact
5459 * range we want, there is no reason to allocate a new one, however if
5460 * it is not right then we need to free this one and drop the cache for
5461 * our range.
5462 */
5463 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5464 em->len != len) {
5465 free_extent_map(em);
5466 em = NULL;
5467 insert = true;
5468 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5469 }
5447 5470
5448 trans = btrfs_join_transaction(root, 0); 5471 trans = btrfs_join_transaction(root, 0);
5449 if (IS_ERR(trans)) 5472 if (IS_ERR(trans))
@@ -5459,10 +5482,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5459 goto out; 5482 goto out;
5460 } 5483 }
5461 5484
5462 em = alloc_extent_map(GFP_NOFS);
5463 if (!em) { 5485 if (!em) {
5464 em = ERR_PTR(-ENOMEM); 5486 em = alloc_extent_map(GFP_NOFS);
5465 goto out; 5487 if (!em) {
5488 em = ERR_PTR(-ENOMEM);
5489 goto out;
5490 }
5466 } 5491 }
5467 5492
5468 em->start = start; 5493 em->start = start;
@@ -5472,9 +5497,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5472 em->block_start = ins.objectid; 5497 em->block_start = ins.objectid;
5473 em->block_len = ins.offset; 5498 em->block_len = ins.offset;
5474 em->bdev = root->fs_info->fs_devices->latest_bdev; 5499 em->bdev = root->fs_info->fs_devices->latest_bdev;
5500
5501 /*
5502 * We need to do this because if we're using the original em we searched
5503 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5504 */
5505 em->flags = 0;
5475 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5506 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5476 5507
5477 while (1) { 5508 while (insert) {
5478 write_lock(&em_tree->lock); 5509 write_lock(&em_tree->lock);
5479 ret = add_extent_mapping(em_tree, em); 5510 ret = add_extent_mapping(em_tree, em);
5480 write_unlock(&em_tree->lock); 5511 write_unlock(&em_tree->lock);
@@ -5692,8 +5723,7 @@ must_cow:
5692 * it above 5723 * it above
5693 */ 5724 */
5694 len = bh_result->b_size; 5725 len = bh_result->b_size;
5695 free_extent_map(em); 5726 em = btrfs_new_extent_direct(inode, em, start, len);
5696 em = btrfs_new_extent_direct(inode, start, len);
5697 if (IS_ERR(em)) 5727 if (IS_ERR(em))
5698 return PTR_ERR(em); 5728 return PTR_ERR(em);
5699 len = min(len, em->len - (start - em->start)); 5729 len = min(len, em->len - (start - em->start));
@@ -5856,8 +5886,10 @@ again:
5856 } 5886 }
5857 5887
5858 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5888 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5859 btrfs_ordered_update_i_size(inode, 0, ordered); 5889 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5860 btrfs_update_inode(trans, root, inode); 5890 if (!ret)
5891 btrfs_update_inode(trans, root, inode);
5892 ret = 0;
5861out_unlock: 5893out_unlock:
5862 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5894 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5863 ordered->file_offset + ordered->len - 1, 5895 ordered->file_offset + ordered->len - 1,
@@ -5943,7 +5975,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5943 5975
5944static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5976static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5945 int rw, u64 file_offset, int skip_sum, 5977 int rw, u64 file_offset, int skip_sum,
5946 u32 *csums) 5978 u32 *csums, int async_submit)
5947{ 5979{
5948 int write = rw & REQ_WRITE; 5980 int write = rw & REQ_WRITE;
5949 struct btrfs_root *root = BTRFS_I(inode)->root; 5981 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5954,13 +5986,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5954 if (ret) 5986 if (ret)
5955 goto err; 5987 goto err;
5956 5988
5957 if (write && !skip_sum) { 5989 if (skip_sum)
5990 goto map;
5991
5992 if (write && async_submit) {
5958 ret = btrfs_wq_submit_bio(root->fs_info, 5993 ret = btrfs_wq_submit_bio(root->fs_info,
5959 inode, rw, bio, 0, 0, 5994 inode, rw, bio, 0, 0,
5960 file_offset, 5995 file_offset,
5961 __btrfs_submit_bio_start_direct_io, 5996 __btrfs_submit_bio_start_direct_io,
5962 __btrfs_submit_bio_done); 5997 __btrfs_submit_bio_done);
5963 goto err; 5998 goto err;
5999 } else if (write) {
6000 /*
6001 * If we aren't doing async submit, calculate the csum of the
6002 * bio now.
6003 */
6004 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6005 if (ret)
6006 goto err;
5964 } else if (!skip_sum) { 6007 } else if (!skip_sum) {
5965 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6008 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5966 file_offset, csums); 6009 file_offset, csums);
@@ -5968,7 +6011,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5968 goto err; 6011 goto err;
5969 } 6012 }
5970 6013
5971 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6014map:
6015 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5972err: 6016err:
5973 bio_put(bio); 6017 bio_put(bio);
5974 return ret; 6018 return ret;
@@ -5990,23 +6034,30 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5990 int nr_pages = 0; 6034 int nr_pages = 0;
5991 u32 *csums = dip->csums; 6035 u32 *csums = dip->csums;
5992 int ret = 0; 6036 int ret = 0;
6037 int async_submit = 0;
5993 int write = rw & REQ_WRITE; 6038 int write = rw & REQ_WRITE;
5994 6039
5995 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5996 if (!bio)
5997 return -ENOMEM;
5998 bio->bi_private = dip;
5999 bio->bi_end_io = btrfs_end_dio_bio;
6000 atomic_inc(&dip->pending_bios);
6001
6002 map_length = orig_bio->bi_size; 6040 map_length = orig_bio->bi_size;
6003 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6041 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
6004 &map_length, NULL, 0); 6042 &map_length, NULL, 0);
6005 if (ret) { 6043 if (ret) {
6006 bio_put(bio); 6044 bio_put(orig_bio);
6007 return -EIO; 6045 return -EIO;
6008 } 6046 }
6009 6047
6048 if (map_length >= orig_bio->bi_size) {
6049 bio = orig_bio;
6050 goto submit;
6051 }
6052
6053 async_submit = 1;
6054 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6055 if (!bio)
6056 return -ENOMEM;
6057 bio->bi_private = dip;
6058 bio->bi_end_io = btrfs_end_dio_bio;
6059 atomic_inc(&dip->pending_bios);
6060
6010 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6061 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6011 if (unlikely(map_length < submit_len + bvec->bv_len || 6062 if (unlikely(map_length < submit_len + bvec->bv_len ||
6012 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6063 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6020,7 +6071,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6020 atomic_inc(&dip->pending_bios); 6071 atomic_inc(&dip->pending_bios);
6021 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6072 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6022 file_offset, skip_sum, 6073 file_offset, skip_sum,
6023 csums); 6074 csums, async_submit);
6024 if (ret) { 6075 if (ret) {
6025 bio_put(bio); 6076 bio_put(bio);
6026 atomic_dec(&dip->pending_bios); 6077 atomic_dec(&dip->pending_bios);
@@ -6057,8 +6108,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6057 } 6108 }
6058 } 6109 }
6059 6110
6111submit:
6060 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6112 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6061 csums); 6113 csums, async_submit);
6062 if (!ret) 6114 if (!ret)
6063 return 0; 6115 return 0;
6064 6116
@@ -6153,6 +6205,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6153 unsigned long nr_segs) 6205 unsigned long nr_segs)
6154{ 6206{
6155 int seg; 6207 int seg;
6208 int i;
6156 size_t size; 6209 size_t size;
6157 unsigned long addr; 6210 unsigned long addr;
6158 unsigned blocksize_mask = root->sectorsize - 1; 6211 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6167,8 +6220,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6167 addr = (unsigned long)iov[seg].iov_base; 6220 addr = (unsigned long)iov[seg].iov_base;
6168 size = iov[seg].iov_len; 6221 size = iov[seg].iov_len;
6169 end += size; 6222 end += size;
6170 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6223 if ((addr & blocksize_mask) || (size & blocksize_mask))
6171 goto out; 6224 goto out;
6225
6226 /* If this is a write we don't need to check anymore */
6227 if (rw & WRITE)
6228 continue;
6229
6230 /*
6231 * Check to make sure we don't have duplicate iov_base's in this
6232 * iovec, if so return EINVAL, otherwise we'll get csum errors
6233 * when reading back.
6234 */
6235 for (i = seg + 1; i < nr_segs; i++) {
6236 if (iov[seg].iov_base == iov[i].iov_base)
6237 goto out;
6238 }
6172 } 6239 }
6173 retval = 0; 6240 retval = 0;
6174out: 6241out:
@@ -6960,8 +7027,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6960 * should cover the worst case number of items we'll modify. 7027 * should cover the worst case number of items we'll modify.
6961 */ 7028 */
6962 trans = btrfs_start_transaction(root, 20); 7029 trans = btrfs_start_transaction(root, 20);
6963 if (IS_ERR(trans)) 7030 if (IS_ERR(trans)) {
6964 return PTR_ERR(trans); 7031 ret = PTR_ERR(trans);
7032 goto out_notrans;
7033 }
6965 7034
6966 btrfs_set_trans_block_group(trans, new_dir); 7035 btrfs_set_trans_block_group(trans, new_dir);
6967 7036
@@ -7061,7 +7130,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7061 } 7130 }
7062out_fail: 7131out_fail:
7063 btrfs_end_transaction_throttle(trans, root); 7132 btrfs_end_transaction_throttle(trans, root);
7064 7133out_notrans:
7065 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7134 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
7066 up_read(&root->fs_info->subvol_sem); 7135 up_read(&root->fs_info->subvol_sem);
7067 7136
@@ -7209,9 +7278,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7209 dentry->d_name.len, dir->i_ino, objectid, 7278 dentry->d_name.len, dir->i_ino, objectid,
7210 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7279 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
7211 &index); 7280 &index);
7212 err = PTR_ERR(inode); 7281 if (IS_ERR(inode)) {
7213 if (IS_ERR(inode)) 7282 err = PTR_ERR(inode);
7214 goto out_unlock; 7283 goto out_unlock;
7284 }
7215 7285
7216 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 7286 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
7217 if (err) { 7287 if (err) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7c07fe26b7cf..2616f7ed4799 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
81 iflags |= FS_NOATIME_FL; 81 iflags |= FS_NOATIME_FL;
82 if (flags & BTRFS_INODE_DIRSYNC) 82 if (flags & BTRFS_INODE_DIRSYNC)
83 iflags |= FS_DIRSYNC_FL; 83 iflags |= FS_DIRSYNC_FL;
84 if (flags & BTRFS_INODE_NODATACOW)
85 iflags |= FS_NOCOW_FL;
86
87 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
88 iflags |= FS_COMPR_FL;
89 else if (flags & BTRFS_INODE_NOCOMPRESS)
90 iflags |= FS_NOCOMP_FL;
84 91
85 return iflags; 92 return iflags;
86} 93}
@@ -144,16 +151,13 @@ static int check_flags(unsigned int flags)
144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 151 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
145 FS_NOATIME_FL | FS_NODUMP_FL | \ 152 FS_NOATIME_FL | FS_NODUMP_FL | \
146 FS_SYNC_FL | FS_DIRSYNC_FL | \ 153 FS_SYNC_FL | FS_DIRSYNC_FL | \
147 FS_NOCOMP_FL | FS_COMPR_FL | \ 154 FS_NOCOMP_FL | FS_COMPR_FL |
148 FS_NOCOW_FL | FS_COW_FL)) 155 FS_NOCOW_FL))
149 return -EOPNOTSUPP; 156 return -EOPNOTSUPP;
150 157
151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 158 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
152 return -EINVAL; 159 return -EINVAL;
153 160
154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
155 return -EINVAL;
156
157 return 0; 161 return 0;
158} 162}
159 163
@@ -218,6 +222,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
218 ip->flags |= BTRFS_INODE_DIRSYNC; 222 ip->flags |= BTRFS_INODE_DIRSYNC;
219 else 223 else
220 ip->flags &= ~BTRFS_INODE_DIRSYNC; 224 ip->flags &= ~BTRFS_INODE_DIRSYNC;
225 if (flags & FS_NOCOW_FL)
226 ip->flags |= BTRFS_INODE_NODATACOW;
227 else
228 ip->flags &= ~BTRFS_INODE_NODATACOW;
221 229
222 /* 230 /*
223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 231 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +238,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
230 } else if (flags & FS_COMPR_FL) { 238 } else if (flags & FS_COMPR_FL) {
231 ip->flags |= BTRFS_INODE_COMPRESS; 239 ip->flags |= BTRFS_INODE_COMPRESS;
232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 240 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
241 } else {
242 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
233 } 243 }
234 if (flags & FS_NOCOW_FL)
235 ip->flags |= BTRFS_INODE_NODATACOW;
236 else if (flags & FS_COW_FL)
237 ip->flags &= ~BTRFS_INODE_NODATACOW;
238 244
239 trans = btrfs_join_transaction(root, 1); 245 trans = btrfs_join_transaction(root, 1);
240 BUG_ON(IS_ERR(trans)); 246 BUG_ON(IS_ERR(trans));
@@ -373,6 +379,10 @@ static noinline int create_subvol(struct btrfs_root *root,
373 inode_item->nbytes = cpu_to_le64(root->leafsize); 379 inode_item->nbytes = cpu_to_le64(root->leafsize);
374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 380 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
375 381
382 root_item.flags = 0;
383 root_item.byte_limit = 0;
384 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
385
376 btrfs_set_root_bytenr(&root_item, leaf->start); 386 btrfs_set_root_bytenr(&root_item, leaf->start);
377 btrfs_set_root_generation(&root_item, trans->transid); 387 btrfs_set_root_generation(&root_item, trans->transid);
378 btrfs_set_root_level(&root_item, 0); 388 btrfs_set_root_level(&root_item, 0);
@@ -2283,7 +2293,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2283 struct btrfs_ioctl_space_info space; 2293 struct btrfs_ioctl_space_info space;
2284 struct btrfs_ioctl_space_info *dest; 2294 struct btrfs_ioctl_space_info *dest;
2285 struct btrfs_ioctl_space_info *dest_orig; 2295 struct btrfs_ioctl_space_info *dest_orig;
2286 struct btrfs_ioctl_space_info *user_dest; 2296 struct btrfs_ioctl_space_info __user *user_dest;
2287 struct btrfs_space_info *info; 2297 struct btrfs_space_info *info;
2288 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2298 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2289 BTRFS_BLOCK_GROUP_SYSTEM, 2299 BTRFS_BLOCK_GROUP_SYSTEM,
@@ -2436,8 +2446,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2436 return PTR_ERR(trans); 2446 return PTR_ERR(trans);
2437 transid = trans->transid; 2447 transid = trans->transid;
2438 ret = btrfs_commit_transaction_async(trans, root, 0); 2448 ret = btrfs_commit_transaction_async(trans, root, 0);
2439 if (ret) 2449 if (ret) {
2450 btrfs_end_transaction(trans, root);
2440 return ret; 2451 return ret;
2452 }
2441 2453
2442 if (argp) 2454 if (argp)
2443 if (copy_to_user(argp, &transid, sizeof(transid))) 2455 if (copy_to_user(argp, &transid, sizeof(transid)))
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 58250e09eb05..199a80134312 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2346,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2346 root = next->root; 2346 root = next->root;
2347 BUG_ON(!root); 2347 BUG_ON(!root);
2348 2348
2349 /* no other choice for non-refernce counted tree */ 2349 /* no other choice for non-references counted tree */
2350 if (!root->ref_cows) 2350 if (!root->ref_cows)
2351 return root; 2351 return root;
2352 2352
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 29b2d7c930eb..6928bff62daa 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -473,3 +473,21 @@ again:
473 btrfs_free_path(path); 473 btrfs_free_path(path);
474 return 0; 474 return 0;
475} 475}
476
477/*
478 * Old btrfs forgets to init root_item->flags and root_item->byte_limit
479 * for subvolumes. To work around this problem, we steal a bit from
480 * root_item->inode_item->flags, and use it to indicate if those fields
481 * have been properly initialized.
482 */
483void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
484{
485 u64 inode_flags = le64_to_cpu(root_item->inode.flags);
486
487 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
488 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
489 root_item->inode.flags = cpu_to_le64(inode_flags);
490 root_item->flags = 0;
491 root_item->byte_limit = 0;
492 }
493}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2edfc039f098..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
163}; 163};
164 164
165static match_table_t tokens = { 165static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
189 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
192 {Opt_err, NULL}, 193 {Opt_err, NULL},
193}; 194};
194 195
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 break; 233 break;
233 case Opt_subvol: 234 case Opt_subvol:
234 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
235 case Opt_device: 237 case Opt_device:
236 /* 238 /*
237 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
388 */ 390 */
389static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
390 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
391 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
392{ 394{
393 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
394 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
429 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
430 } 432 }
431 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
432 case Opt_device: 446 case Opt_device:
433 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
434 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -644,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644{ 658{
645 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 659 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
646 struct btrfs_fs_info *info = root->fs_info; 660 struct btrfs_fs_info *info = root->fs_info;
661 char *compress_type;
647 662
648 if (btrfs_test_opt(root, DEGRADED)) 663 if (btrfs_test_opt(root, DEGRADED))
649 seq_puts(seq, ",degraded"); 664 seq_puts(seq, ",degraded");
@@ -662,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
662 if (info->thread_pool_size != min_t(unsigned long, 677 if (info->thread_pool_size != min_t(unsigned long,
663 num_online_cpus() + 2, 8)) 678 num_online_cpus() + 2, 8))
664 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 679 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
665 if (btrfs_test_opt(root, COMPRESS)) 680 if (btrfs_test_opt(root, COMPRESS)) {
666 seq_puts(seq, ",compress"); 681 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
682 compress_type = "zlib";
683 else
684 compress_type = "lzo";
685 if (btrfs_test_opt(root, FORCE_COMPRESS))
686 seq_printf(seq, ",compress-force=%s", compress_type);
687 else
688 seq_printf(seq, ",compress=%s", compress_type);
689 }
667 if (btrfs_test_opt(root, NOSSD)) 690 if (btrfs_test_opt(root, NOSSD))
668 seq_puts(seq, ",nossd"); 691 seq_puts(seq, ",nossd");
669 if (btrfs_test_opt(root, SSD_SPREAD)) 692 if (btrfs_test_opt(root, SSD_SPREAD))
@@ -678,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
678 seq_puts(seq, ",discard"); 701 seq_puts(seq, ",discard");
679 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 702 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
680 seq_puts(seq, ",noacl"); 703 seq_puts(seq, ",noacl");
704 if (btrfs_test_opt(root, SPACE_CACHE))
705 seq_puts(seq, ",space_cache");
706 if (btrfs_test_opt(root, CLEAR_CACHE))
707 seq_puts(seq, ",clear_cache");
708 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
709 seq_puts(seq, ",user_subvol_rm_allowed");
681 return 0; 710 return 0;
682} 711}
683 712
@@ -721,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
721 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
722 char *subvol_name = NULL; 751 char *subvol_name = NULL;
723 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
724 int error = 0; 754 int error = 0;
725 755
726 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -728,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
728 758
729 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
730 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
731 &fs_devices); 761 &subvol_rootid, &fs_devices);
732 if (error) 762 if (error)
733 return ERR_PTR(error); 763 return ERR_PTR(error);
734 764
@@ -792,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
792 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
793 } 823 }
794 824
795 root = get_default_root(s, subvol_objectid);
796 if (IS_ERR(root)) {
797 error = PTR_ERR(root);
798 deactivate_locked_super(s);
799 goto error_free_subvol_name;
800 }
801 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
802 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
803 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
804 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
805 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
806 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -821,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
821 } 853 }
822 dput(root); 854 dput(root);
823 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
824 } 863 }
825 864
826 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ce48eb59d615..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -197,13 +196,14 @@ again:
197 196
198 ret = join_transaction(root); 197 ret = join_transaction(root);
199 if (ret < 0) { 198 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK) 200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex); 201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 202 return ERR_PTR(ret);
203 } 203 }
204 204
205 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
206 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
209 209
@@ -223,10 +223,18 @@ again:
223 223
224 if (num_items > 0) { 224 if (num_items > 0) {
225 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
226 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
227 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
228 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
229 } 236 }
237
230 if (ret < 0) { 238 if (ret < 0) {
231 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
232 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -326,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
326 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
327 } 335 }
328 336
329 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
330 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
331 339
332 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -456,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
456 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
457 } 465 }
458 466
459 if (lock)
460 mutex_lock(&info->trans_mutex);
461 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
462 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
463 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
464 470
465 smp_mb(); 471 smp_mb();
466 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
467 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
468 put_transaction(cur_trans); 474 put_transaction(cur_trans);
469 if (lock)
470 mutex_unlock(&info->trans_mutex);
471 475
472 if (current->journal_info == trans) 476 if (current->journal_info == trans)
473 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -975,6 +979,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
975 record_root_in_trans(trans, root); 979 record_root_in_trans(trans, root);
976 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 980 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
977 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 981 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
982 btrfs_check_and_init_root_item(new_root_item);
978 983
979 root_flags = btrfs_root_flags(new_root_item); 984 root_flags = btrfs_root_flags(new_root_item);
980 if (pending->readonly) 985 if (pending->readonly)
@@ -1176,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1176 /* take transaction reference */ 1181 /* take transaction reference */
1177 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1178 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1179 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1180 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1181 1186
1182 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1235,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1235 1240
1236 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1237 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1238 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1239 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1240 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1241 1246
@@ -1257,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1257 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1258 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1259 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1260 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1261 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1262 1267
1263 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1298,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1298 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1299 1304
1300 smp_mb(); 1305 smp_mb();
1301 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1302 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1303 else if (should_grow) 1308 else if (should_grow)
1304 schedule_timeout(1); 1309 schedule_timeout(1);
1305 1310
1306 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1307 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1308 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1309 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1310 1315
1311 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1392,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1392 1397
1393 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1394 1399
1400 list_del_init(&cur_trans->list);
1395 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1396 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1397 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c50271ad3157..f997ec0c1ba4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2209,8 +2209,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2209 2209
2210 log = root->log_root; 2210 log = root->log_root;
2211 path = btrfs_alloc_path(); 2211 path = btrfs_alloc_path();
2212 if (!path) 2212 if (!path) {
2213 return -ENOMEM; 2213 err = -ENOMEM;
2214 goto out_unlock;
2215 }
2214 2216
2215 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2217 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2216 name, name_len, -1); 2218 name, name_len, -1);
@@ -2271,6 +2273,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2271 } 2273 }
2272fail: 2274fail:
2273 btrfs_free_path(path); 2275 btrfs_free_path(path);
2276out_unlock:
2274 mutex_unlock(&BTRFS_I(dir)->log_mutex); 2277 mutex_unlock(&BTRFS_I(dir)->log_mutex);
2275 if (ret == -ENOSPC) { 2278 if (ret == -ENOSPC) {
2276 root->fs_info->last_trans_log_full_commit = trans->transid; 2279 root->fs_info->last_trans_log_full_commit = trans->transid;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 309a57b9fc85..c7367ae5a3e6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -155,6 +155,15 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
155 unsigned long limit; 155 unsigned long limit;
156 unsigned long last_waited = 0; 156 unsigned long last_waited = 0;
157 int force_reg = 0; 157 int force_reg = 0;
158 struct blk_plug plug;
159
160 /*
161 * this function runs all the bios we've collected for
162 * a particular device. We don't want to wander off to
163 * another device without first sending all of these down.
164 * So, setup a plug here and finish it off before we return
165 */
166 blk_start_plug(&plug);
158 167
159 bdi = blk_get_backing_dev_info(device->bdev); 168 bdi = blk_get_backing_dev_info(device->bdev);
160 fs_info = device->dev_root->fs_info; 169 fs_info = device->dev_root->fs_info;
@@ -294,6 +303,7 @@ loop_lock:
294 spin_unlock(&device->io_lock); 303 spin_unlock(&device->io_lock);
295 304
296done: 305done:
306 blk_finish_plug(&plug);
297 return 0; 307 return 0;
298} 308}
299 309
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
250 239
251 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
252 if (!size) 241 if (!size)
253 continue; 242 goto next;
254 243
255 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
256 ret = -ERANGE; 245 ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
263 252
264 size_left -= name_len + 1; 253 size_left -= name_len + 1;
265 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
266 } 257 }
267 ret = total_size; 258 ret = total_size;
268 259
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 37fe101a4e0d..1064805e653b 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -197,7 +197,7 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
197} 197}
198 198
199/* 199/*
200 * update the auxilliary data for an object object on disk 200 * update the auxiliary data for an object object on disk
201 */ 201 */
202static void cachefiles_update_object(struct fscache_object *_object) 202static void cachefiles_update_object(struct fscache_object *_object)
203{ 203{
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 37368ba2e67c..38b8ab554924 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -24,7 +24,7 @@
24 * context needs to be associated with the osd write during writeback. 24 * context needs to be associated with the osd write during writeback.
25 * 25 *
26 * Similarly, struct ceph_inode_info maintains a set of counters to 26 * Similarly, struct ceph_inode_info maintains a set of counters to
27 * count dirty pages on the inode. In the absense of snapshots, 27 * count dirty pages on the inode. In the absence of snapshots,
28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. 28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
29 * 29 *
30 * When a snapshot is taken (that is, when the client receives 30 * When a snapshot is taken (that is, when the client receives
@@ -775,6 +775,13 @@ get_more_pages:
775 ci->i_truncate_seq, 775 ci->i_truncate_seq,
776 ci->i_truncate_size, 776 ci->i_truncate_size,
777 &inode->i_mtime, true, 1, 0); 777 &inode->i_mtime, true, 1, 0);
778
779 if (!req) {
780 rc = -ENOMEM;
781 unlock_page(page);
782 break;
783 }
784
778 max_pages = req->r_num_pages; 785 max_pages = req->r_num_pages;
779 786
780 alloc_page_vec(fsc, req); 787 alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6b61ded701e1..2a5404c1c42f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -765,7 +765,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
765 if (touch) { 765 if (touch) {
766 struct rb_node *q; 766 struct rb_node *q;
767 767
768 /* touch this + preceeding caps */ 768 /* touch this + preceding caps */
769 __touch_cap(cap); 769 __touch_cap(cap);
770 for (q = rb_first(&ci->i_caps); q != p; 770 for (q = rb_first(&ci->i_caps); q != p;
771 q = rb_next(q)) { 771 q = rb_next(q)) {
@@ -819,7 +819,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
819 used |= CEPH_CAP_FILE_CACHE; 819 used |= CEPH_CAP_FILE_CACHE;
820 if (ci->i_wr_ref) 820 if (ci->i_wr_ref)
821 used |= CEPH_CAP_FILE_WR; 821 used |= CEPH_CAP_FILE_WR;
822 if (ci->i_wrbuffer_ref) 822 if (ci->i_wb_ref || ci->i_wrbuffer_ref)
823 used |= CEPH_CAP_FILE_BUFFER; 823 used |= CEPH_CAP_FILE_BUFFER;
824 return used; 824 return used;
825} 825}
@@ -1331,10 +1331,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1331} 1331}
1332 1332
1333/* 1333/*
1334 * Mark caps dirty. If inode is newly dirty, add to the global dirty 1334 * Mark caps dirty. If inode is newly dirty, return the dirty flags.
1335 * list. 1335 * Caller is then responsible for calling __mark_inode_dirty with the
1336 * returned flags value.
1336 */ 1337 */
1337void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) 1338int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1338{ 1339{
1339 struct ceph_mds_client *mdsc = 1340 struct ceph_mds_client *mdsc =
1340 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 1341 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1358,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1357 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1358 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
1358 spin_unlock(&mdsc->cap_dirty_lock); 1359 spin_unlock(&mdsc->cap_dirty_lock);
1359 if (ci->i_flushing_caps == 0) { 1360 if (ci->i_flushing_caps == 0) {
1360 igrab(inode); 1361 ihold(inode);
1361 dirty |= I_DIRTY_SYNC; 1362 dirty |= I_DIRTY_SYNC;
1362 } 1363 }
1363 } 1364 }
@@ -1365,9 +1366,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1365 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && 1366 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
1366 (mask & CEPH_CAP_FILE_BUFFER)) 1367 (mask & CEPH_CAP_FILE_BUFFER))
1367 dirty |= I_DIRTY_DATASYNC; 1368 dirty |= I_DIRTY_DATASYNC;
1368 if (dirty)
1369 __mark_inode_dirty(inode, dirty);
1370 __cap_delay_requeue(mdsc, ci); 1369 __cap_delay_requeue(mdsc, ci);
1370 return dirty;
1371} 1371}
1372 1372
1373/* 1373/*
@@ -1990,11 +1990,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1990 if (got & CEPH_CAP_FILE_WR) 1990 if (got & CEPH_CAP_FILE_WR)
1991 ci->i_wr_ref++; 1991 ci->i_wr_ref++;
1992 if (got & CEPH_CAP_FILE_BUFFER) { 1992 if (got & CEPH_CAP_FILE_BUFFER) {
1993 if (ci->i_wrbuffer_ref == 0) 1993 if (ci->i_wb_ref == 0)
1994 igrab(&ci->vfs_inode); 1994 ihold(&ci->vfs_inode);
1995 ci->i_wrbuffer_ref++; 1995 ci->i_wb_ref++;
1996 dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n", 1996 dout("__take_cap_refs %p wb %d -> %d (?)\n",
1997 &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref); 1997 &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
1998 } 1998 }
1999} 1999}
2000 2000
@@ -2169,12 +2169,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2169 if (--ci->i_rdcache_ref == 0) 2169 if (--ci->i_rdcache_ref == 0)
2170 last++; 2170 last++;
2171 if (had & CEPH_CAP_FILE_BUFFER) { 2171 if (had & CEPH_CAP_FILE_BUFFER) {
2172 if (--ci->i_wrbuffer_ref == 0) { 2172 if (--ci->i_wb_ref == 0) {
2173 last++; 2173 last++;
2174 put++; 2174 put++;
2175 } 2175 }
2176 dout("put_cap_refs %p wrbuffer %d -> %d (?)\n", 2176 dout("put_cap_refs %p wb %d -> %d (?)\n",
2177 inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref); 2177 inode, ci->i_wb_ref+1, ci->i_wb_ref);
2178 } 2178 }
2179 if (had & CEPH_CAP_FILE_WR) 2179 if (had & CEPH_CAP_FILE_WR)
2180 if (--ci->i_wr_ref == 0) { 2180 if (--ci->i_wr_ref == 0) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159b512d5a27..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -734,9 +734,12 @@ retry_snap:
734 } 734 }
735 } 735 }
736 if (ret >= 0) { 736 if (ret >= 0) {
737 int dirty;
737 spin_lock(&inode->i_lock); 738 spin_lock(&inode->i_lock);
738 __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 739 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
739 spin_unlock(&inode->i_lock); 740 spin_unlock(&inode->i_lock);
741 if (dirty)
742 __mark_inode_dirty(inode, dirty);
740 } 743 }
741 744
742out: 745out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b54c97da1c43..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
355 ci->i_rd_ref = 0; 355 ci->i_rd_ref = 0;
356 ci->i_rdcache_ref = 0; 356 ci->i_rdcache_ref = 0;
357 ci->i_wr_ref = 0; 357 ci->i_wr_ref = 0;
358 ci->i_wb_ref = 0;
358 ci->i_wrbuffer_ref = 0; 359 ci->i_wrbuffer_ref = 0;
359 ci->i_wrbuffer_ref_head = 0; 360 ci->i_wrbuffer_ref_head = 0;
360 ci->i_shared_gen = 0; 361 ci->i_shared_gen = 0;
@@ -1567,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1567 int release = 0, dirtied = 0; 1568 int release = 0, dirtied = 0;
1568 int mask = 0; 1569 int mask = 0;
1569 int err = 0; 1570 int err = 0;
1571 int inode_dirty_flags = 0;
1570 1572
1571 if (ceph_snap(inode) != CEPH_NOSNAP) 1573 if (ceph_snap(inode) != CEPH_NOSNAP)
1572 return -EROFS; 1574 return -EROFS;
@@ -1725,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1725 dout("setattr %p ATTR_FILE ... hrm!\n", inode); 1727 dout("setattr %p ATTR_FILE ... hrm!\n", inode);
1726 1728
1727 if (dirtied) { 1729 if (dirtied) {
1728 __ceph_mark_dirty_caps(ci, dirtied); 1730 inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
1729 inode->i_ctime = CURRENT_TIME; 1731 inode->i_ctime = CURRENT_TIME;
1730 } 1732 }
1731 1733
1732 release &= issued; 1734 release &= issued;
1733 spin_unlock(&inode->i_lock); 1735 spin_unlock(&inode->i_lock);
1734 1736
1737 if (inode_dirty_flags)
1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739
1735 if (mask) { 1740 if (mask) {
1736 req->r_inode = igrab(inode); 1741 req->r_inode = igrab(inode);
1737 req->r_inode_drop = release; 1742 req->r_inode_drop = release;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a1ee8fa3a8e7..d0fae4ce9ba5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3215,9 +3215,15 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
3215{ 3215{
3216 struct ceph_mds_client *mdsc = fsc->mdsc; 3216 struct ceph_mds_client *mdsc = fsc->mdsc;
3217 3217
3218 dout("mdsc_destroy %p\n", mdsc);
3218 ceph_mdsc_stop(mdsc); 3219 ceph_mdsc_stop(mdsc);
3220
3221 /* flush out any connection work with references to us */
3222 ceph_msgr_flush();
3223
3219 fsc->mdsc = NULL; 3224 fsc->mdsc = NULL;
3220 kfree(mdsc); 3225 kfree(mdsc);
3226 dout("mdsc_destroy %p done\n", mdsc);
3221} 3227}
3222 3228
3223 3229
@@ -3298,8 +3304,8 @@ static void con_put(struct ceph_connection *con)
3298{ 3304{
3299 struct ceph_mds_session *s = con->private; 3305 struct ceph_mds_session *s = con->private;
3300 3306
3307 dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
3301 ceph_put_mds_session(s); 3308 ceph_put_mds_session(s);
3302 dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
3303} 3309}
3304 3310
3305/* 3311/*
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0aee66b92af3..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
206 up_write(&mdsc->snap_rwsem); 206 up_write(&mdsc->snap_rwsem);
207 } else { 207 } else {
208 spin_lock(&mdsc->snap_empty_lock); 208 spin_lock(&mdsc->snap_empty_lock);
209 list_add(&mdsc->snap_empty, &realm->empty_item); 209 list_add(&realm->empty_item, &mdsc->snap_empty);
210 spin_unlock(&mdsc->snap_empty_lock); 210 spin_unlock(&mdsc->snap_empty_lock);
211 } 211 }
212} 212}
@@ -342,7 +342,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
342 num = 0; 342 num = 0;
343 snapc->seq = realm->seq; 343 snapc->seq = realm->seq;
344 if (parent) { 344 if (parent) {
345 /* include any of parent's snaps occuring _after_ my 345 /* include any of parent's snaps occurring _after_ my
346 parent became my parent */ 346 parent became my parent */
347 for (i = 0; i < parent->cached_context->num_snaps; i++) 347 for (i = 0; i < parent->cached_context->num_snaps; i++)
348 if (parent->cached_context->snaps[i] >= 348 if (parent->cached_context->snaps[i] >=
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a9e78b4a258c..f2f77fd3c14c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -353,7 +353,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
353 353
354 if (opt->name) 354 if (opt->name)
355 seq_printf(m, ",name=%s", opt->name); 355 seq_printf(m, ",name=%s", opt->name);
356 if (opt->secret) 356 if (opt->key)
357 seq_puts(m, ",secret=<hidden>"); 357 seq_puts(m, ",secret=<hidden>");
358 358
359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 619fe719968f..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,7 +293,7 @@ struct ceph_inode_info {
293 293
294 /* held references to caps */ 294 /* held references to caps */
295 int i_pin_ref; 295 int i_pin_ref;
296 int i_rd_ref, i_rdcache_ref, i_wr_ref; 296 int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
297 int i_wrbuffer_ref, i_wrbuffer_ref_head; 297 int i_wrbuffer_ref, i_wrbuffer_ref_head;
298 u32 i_shared_gen; /* increment each time we get FILE_SHARED */ 298 u32 i_shared_gen; /* increment each time we get FILE_SHARED */
299 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ 299 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
@@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
506{ 506{
507 return ci->i_dirty_caps | ci->i_flushing_caps; 507 return ci->i_dirty_caps | ci->i_flushing_caps;
508} 508}
509extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); 509extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
510 510
511extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); 511extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
512extern int __ceph_caps_used(struct ceph_inode_info *ci); 512extern int __ceph_caps_used(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
703 struct ceph_inode_xattr *xattr = NULL; 703 struct ceph_inode_xattr *xattr = NULL;
704 int issued; 704 int issued;
705 int required_blob_size; 705 int required_blob_size;
706 int dirty;
706 707
707 if (ceph_snap(inode) != CEPH_NOSNAP) 708 if (ceph_snap(inode) != CEPH_NOSNAP)
708 return -EROFS; 709 return -EROFS;
@@ -763,11 +764,12 @@ retry:
763 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 764 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
764 err = __set_xattr(ci, newname, name_len, newval, 765 err = __set_xattr(ci, newname, name_len, newval,
765 val_len, 1, 1, 1, &xattr); 766 val_len, 1, 1, 1, &xattr);
766 __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 767 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
767 ci->i_xattrs.dirty = true; 768 ci->i_xattrs.dirty = true;
768 inode->i_ctime = CURRENT_TIME; 769 inode->i_ctime = CURRENT_TIME;
769 spin_unlock(&inode->i_lock); 770 spin_unlock(&inode->i_lock);
770 771 if (dirty)
772 __mark_inode_dirty(inode, dirty);
771 return err; 773 return err;
772 774
773do_sync: 775do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
810 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); 812 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
811 int issued; 813 int issued;
812 int err; 814 int err;
815 int dirty;
813 816
814 if (ceph_snap(inode) != CEPH_NOSNAP) 817 if (ceph_snap(inode) != CEPH_NOSNAP)
815 return -EROFS; 818 return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 goto do_sync; 836 goto do_sync;
834 837
835 err = __remove_xattr_by_name(ceph_inode(inode), name); 838 err = __remove_xattr_by_name(ceph_inode(inode), name);
836 __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 839 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
837 ci->i_xattrs.dirty = true; 840 ci->i_xattrs.dirty = true;
838 inode->i_ctime = CURRENT_TIME; 841 inode->i_ctime = CURRENT_TIME;
839 842
840 spin_unlock(&inode->i_lock); 843 spin_unlock(&inode->i_lock);
841 844 if (dirty)
845 __mark_inode_dirty(inode, dirty);
842 return err; 846 return err;
843do_sync: 847do_sync:
844 spin_unlock(&inode->i_lock); 848 spin_unlock(&inode->i_lock);
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
index 7f7fa3c302af..ea940b1db77b 100644
--- a/fs/cifs/AUTHORS
+++ b/fs/cifs/AUTHORS
@@ -35,7 +35,7 @@ Adrian Bunk (kcalloc cleanups)
35Miklos Szeredi 35Miklos Szeredi
36Kazeon team for various fixes especially for 2.4 version. 36Kazeon team for various fixes especially for 2.4 version.
37Asser Ferno (Change Notify support) 37Asser Ferno (Change Notify support)
38Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup 38Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
39Gunter Kukkukk (testing and suggestions for support of old servers) 39Gunter Kukkukk (testing and suggestions for support of old servers)
40Igor Mammedov (DFS support) 40Igor Mammedov (DFS support)
41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) 41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_DES
10 help 11 help
11 This is the client VFS module for the Common Internet File System 12 This is the client VFS module for the Common Internet File System
12 (CIFS) protocol which is the successor to the Server Message Block 13 (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
152 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
153 is handed over to the application/caller. 154 is handed over to the application/caller.
154 155
155config CIFS_EXPERIMENTAL 156config CIFS_SMB2
156 bool "CIFS Experimental Features (EXPERIMENTAL)" 157 bool "SMB2 network file system support (EXPERIMENTAL)"
158 depends on EXPERIMENTAL && INET && BROKEN
159 select NLS
160 select KEYS
161 select FSCACHE
162 select DNS_RESOLVER
163
164 help
165 This enables experimental support for the SMB2 (Server Message Block
166 version 2) protocol. The SMB2 protocol is the successor to the
167 popular CIFS and SMB network file sharing protocols. SMB2 is the
168 native file sharing mechanism for recent versions of Windows
169 operating systems (since Vista). SMB2 enablement will eventually
170 allow users better performance, security and features, than would be
171 possible with cifs. Note that smb2 mount options also are simpler
172 (compared to cifs) due to protocol improvements.
173
174 Unless you are a developer or tester, say N.
175
176config CIFS_NFSD_EXPORT
177 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
157 depends on CIFS && EXPERIMENTAL 178 depends on CIFS && EXPERIMENTAL
158 help 179 help
159 Enables cifs features under testing. These features are 180 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
160 experimental and currently include DFS support and directory
161 change notification ie fcntl(F_DNOTIFY), as well as the upcall
162 mechanism which will be used for Kerberos session negotiation
163 and uid remapping. Some of these features also may depend on
164 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
165 (which is disabled by default). See the file fs/cifs/README
166 for more details. If unsure, say N.
167
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
diff --git a/fs/cifs/README b/fs/cifs/README
index fe1683590828..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -685,22 +685,6 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
685 support and want to map the uid and gid fields 685 support and want to map the uid and gid fields
686 to values supplied at mount (rather than the 686 to values supplied at mount (rather than the
687 actual values, then set this to zero. (default 1) 687 actual values, then set this to zero. (default 1)
688Experimental When set to 1 used to enable certain experimental
689 features (currently enables multipage writes
690 when signing is enabled, the multipage write
691 performance enhancement was disabled when
692 signing turned on in case buffer was modified
693 just before it was sent, also this flag will
694 be used to use the new experimental directory change
695 notification code). When set to 2 enables
696 an additional experimental feature, "raw ntlmssp"
697 session establishment support (which allows
698 specifying "sec=ntlmssp" on mount). The Linux cifs
699 module will use ntlmv2 authentication encapsulated
700 in "raw ntlmssp" (not using SPNEGO) when
701 "sec=ntlmssp" is specified on mount.
702 This support also requires building cifs with
703 the CONFIG_CIFS_EXPERIMENTAL configuration flag.
704 688
705These experimental features and tracing can be enabled by changing flags in 689These experimental features and tracing can be enabled by changing flags in
706/proc/fs/cifs (after the cifs module has been installed or built into the 690/proc/fs/cifs (after the cifs module has been installed or built into the
@@ -720,18 +704,6 @@ the start of smb requests and responses can be enabled via:
720 704
721 echo 1 > /proc/fs/cifs/traceSMB 705 echo 1 > /proc/fs/cifs/traceSMB
722 706
723Two other experimental features are under development. To test these
724requires enabling CONFIG_CIFS_EXPERIMENTAL
725
726 cifsacl support needed to retrieve approximated mode bits based on
727 the contents on the CIFS ACL.
728
729 lease support: cifs will check the oplock state before calling into
730 the vfs to see if we can grant a lease on a file.
731
732 DNOTIFY fcntl: needed for support of directory change
733 notification and perhaps later for file leases)
734
735Per share (per client mount) statistics are available in /proc/fs/cifs/Stats 707Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
736if the kernel was configured with cifs statistics enabled. The statistics 708if the kernel was configured with cifs statistics enabled. The statistics
737represent the number of successful (ie non-zero return code from the server) 709represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e654dfd092c3..53d57a3fe427 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -50,7 +50,7 @@ void cifs_fscache_unregister(void)
50 */ 50 */
51struct cifs_server_key { 51struct cifs_server_key {
52 uint16_t family; /* address family */ 52 uint16_t family; /* address family */
53 uint16_t port; /* IP port */ 53 __be16 port; /* IP port */
54 union { 54 union {
55 struct in_addr ipv4_addr; 55 struct in_addr ipv4_addr;
56 struct in6_addr ipv6_addr; 56 struct in6_addr ipv6_addr;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 65829d32128c..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb)); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 67}
68 68
69 69
@@ -423,7 +423,6 @@ static const struct file_operations cifs_lookup_cache_proc_fops;
423static const struct file_operations traceSMB_proc_fops; 423static const struct file_operations traceSMB_proc_fops;
424static const struct file_operations cifs_multiuser_mount_proc_fops; 424static const struct file_operations cifs_multiuser_mount_proc_fops;
425static const struct file_operations cifs_security_flags_proc_fops; 425static const struct file_operations cifs_security_flags_proc_fops;
426static const struct file_operations cifs_experimental_proc_fops;
427static const struct file_operations cifs_linux_ext_proc_fops; 426static const struct file_operations cifs_linux_ext_proc_fops;
428 427
429void 428void
@@ -441,8 +440,6 @@ cifs_proc_init(void)
441 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); 440 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
442 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
443 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops); 442 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
444 proc_create("Experimental", 0, proc_fs_cifs,
445 &cifs_experimental_proc_fops);
446 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 443 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
447 &cifs_linux_ext_proc_fops); 444 &cifs_linux_ext_proc_fops);
448 proc_create("MultiuserMount", 0, proc_fs_cifs, 445 proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -469,7 +466,6 @@ cifs_proc_clean(void)
469 remove_proc_entry("OplockEnabled", proc_fs_cifs); 466 remove_proc_entry("OplockEnabled", proc_fs_cifs);
470 remove_proc_entry("SecurityFlags", proc_fs_cifs); 467 remove_proc_entry("SecurityFlags", proc_fs_cifs);
471 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 468 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
472 remove_proc_entry("Experimental", proc_fs_cifs);
473 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 469 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
474 remove_proc_entry("fs/cifs", NULL); 470 remove_proc_entry("fs/cifs", NULL);
475} 471}
@@ -550,45 +546,6 @@ static const struct file_operations cifs_oplock_proc_fops = {
550 .write = cifs_oplock_proc_write, 546 .write = cifs_oplock_proc_write,
551}; 547};
552 548
553static int cifs_experimental_proc_show(struct seq_file *m, void *v)
554{
555 seq_printf(m, "%d\n", experimEnabled);
556 return 0;
557}
558
559static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
560{
561 return single_open(file, cifs_experimental_proc_show, NULL);
562}
563
564static ssize_t cifs_experimental_proc_write(struct file *file,
565 const char __user *buffer, size_t count, loff_t *ppos)
566{
567 char c;
568 int rc;
569
570 rc = get_user(c, buffer);
571 if (rc)
572 return rc;
573 if (c == '0' || c == 'n' || c == 'N')
574 experimEnabled = 0;
575 else if (c == '1' || c == 'y' || c == 'Y')
576 experimEnabled = 1;
577 else if (c == '2')
578 experimEnabled = 2;
579
580 return count;
581}
582
583static const struct file_operations cifs_experimental_proc_fops = {
584 .owner = THIS_MODULE,
585 .open = cifs_experimental_proc_open,
586 .read = seq_read,
587 .llseek = seq_lseek,
588 .release = single_release,
589 .write = cifs_experimental_proc_write,
590};
591
592static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) 549static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
593{ 550{
594 seq_printf(m, "%d\n", linuxExtEnabled); 551 seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 0a265ad9e426..2b68ac57d97d 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -53,7 +53,7 @@ void cifs_dfs_release_automount_timer(void)
53 * 53 *
54 * Extracts sharename form full UNC. 54 * Extracts sharename form full UNC.
55 * i.e. strips from UNC trailing path that is not part of share 55 * i.e. strips from UNC trailing path that is not part of share
56 * name and fixup missing '\' in the begining of DFS node refferal 56 * name and fixup missing '\' in the beginning of DFS node refferal
57 * if necessary. 57 * if necessary.
58 * Returns pointer to share name on success or ERR_PTR on error. 58 * Returns pointer to share name on success or ERR_PTR on error.
59 * Caller is responsible for freeing returned string. 59 * Caller is responsible for freeing returned string.
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
58 unsigned int mnt_cifs_flags; 58 unsigned int mnt_cifs_flags;
59 int prepathlen; 59 int prepathlen;
60 char *prepath; /* relative path under the share to mount to */ 60 char *prepath; /* relative path under the share to mount to */
61#ifdef CONFIG_CIFS_DFS_UPCALL 61 char *mountdata; /* options received at mount time or via DFS refs */
62 char *mountdata; /* mount options received at mount time */
63#endif
64 struct backing_dev_info bdi; 62 struct backing_dev_info bdi;
65 struct delayed_work prune_tlinks; 63 struct delayed_work prune_tlinks;
66}; 64};
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4dfba8283165..33d221394aca 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -113,7 +113,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
113 MAX_MECH_STR_LEN + 113 MAX_MECH_STR_LEN +
114 UID_KEY_LEN + (sizeof(uid_t) * 2) + 114 UID_KEY_LEN + (sizeof(uid_t) * 2) +
115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + 115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
116 USER_KEY_LEN + strlen(sesInfo->userName) + 116 USER_KEY_LEN + strlen(sesInfo->user_name) +
117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; 117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
118 118
119 spnego_key = ERR_PTR(-ENOMEM); 119 spnego_key = ERR_PTR(-ENOMEM);
@@ -153,7 +153,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
154 154
155 dp = description + strlen(description); 155 dp = description + strlen(description);
156 sprintf(dp, ";user=%s", sesInfo->userName); 156 sprintf(dp, ";user=%s", sesInfo->user_name);
157 157
158 dp = description + strlen(description); 158 dp = description + strlen(description);
159 sprintf(dp, ";pid=0x%x", current->pid); 159 sprintf(dp, ";pid=0x%x", current->pid);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fc0fd4fde306..1b2e180b018d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -90,7 +90,7 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
90 case UNI_COLON: 90 case UNI_COLON:
91 *target = ':'; 91 *target = ':';
92 break; 92 break;
93 case UNI_ASTERIK: 93 case UNI_ASTERISK:
94 *target = '*'; 94 *target = '*';
95 break; 95 break;
96 case UNI_QUESTION: 96 case UNI_QUESTION:
@@ -264,40 +264,41 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
264 * names are little endian 16 bit Unicode on the wire 264 * names are little endian 16 bit Unicode on the wire
265 */ 265 */
266int 266int
267cifsConvertToUCS(__le16 *target, const char *source, int maxlen, 267cifsConvertToUCS(__le16 *target, const char *source, int srclen,
268 const struct nls_table *cp, int mapChars) 268 const struct nls_table *cp, int mapChars)
269{ 269{
270 int i, j, charlen; 270 int i, j, charlen;
271 int len_remaining = maxlen;
272 char src_char; 271 char src_char;
273 __u16 temp; 272 __le16 dst_char;
273 wchar_t tmp;
274 274
275 if (!mapChars) 275 if (!mapChars)
276 return cifs_strtoUCS(target, source, PATH_MAX, cp); 276 return cifs_strtoUCS(target, source, PATH_MAX, cp);
277 277
278 for (i = 0, j = 0; i < maxlen; j++) { 278 for (i = 0, j = 0; i < srclen; j++) {
279 src_char = source[i]; 279 src_char = source[i];
280 charlen = 1;
280 switch (src_char) { 281 switch (src_char) {
281 case 0: 282 case 0:
282 put_unaligned_le16(0, &target[j]); 283 put_unaligned(0, &target[j]);
283 goto ctoUCS_out; 284 goto ctoUCS_out;
284 case ':': 285 case ':':
285 temp = UNI_COLON; 286 dst_char = cpu_to_le16(UNI_COLON);
286 break; 287 break;
287 case '*': 288 case '*':
288 temp = UNI_ASTERIK; 289 dst_char = cpu_to_le16(UNI_ASTERISK);
289 break; 290 break;
290 case '?': 291 case '?':
291 temp = UNI_QUESTION; 292 dst_char = cpu_to_le16(UNI_QUESTION);
292 break; 293 break;
293 case '<': 294 case '<':
294 temp = UNI_LESSTHAN; 295 dst_char = cpu_to_le16(UNI_LESSTHAN);
295 break; 296 break;
296 case '>': 297 case '>':
297 temp = UNI_GRTRTHAN; 298 dst_char = cpu_to_le16(UNI_GRTRTHAN);
298 break; 299 break;
299 case '|': 300 case '|':
300 temp = UNI_PIPE; 301 dst_char = cpu_to_le16(UNI_PIPE);
301 break; 302 break;
302 /* 303 /*
303 * FIXME: We can not handle remapping backslash (UNI_SLASH) 304 * FIXME: We can not handle remapping backslash (UNI_SLASH)
@@ -305,28 +306,24 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
305 * as they use backslash as separator. 306 * as they use backslash as separator.
306 */ 307 */
307 default: 308 default:
308 charlen = cp->char2uni(source+i, len_remaining, 309 charlen = cp->char2uni(source + i, srclen - i, &tmp);
309 &temp); 310 dst_char = cpu_to_le16(tmp);
311
310 /* 312 /*
311 * if no match, use question mark, which at least in 313 * if no match, use question mark, which at least in
312 * some cases serves as wild card 314 * some cases serves as wild card
313 */ 315 */
314 if (charlen < 1) { 316 if (charlen < 1) {
315 temp = 0x003f; 317 dst_char = cpu_to_le16(0x003f);
316 charlen = 1; 318 charlen = 1;
317 } 319 }
318 len_remaining -= charlen;
319 /*
320 * character may take more than one byte in the source
321 * string, but will take exactly two bytes in the
322 * target string
323 */
324 i += charlen;
325 continue;
326 } 320 }
327 put_unaligned_le16(temp, &target[j]); 321 /*
328 i++; /* move to next char in source string */ 322 * character may take more than one byte in the source string,
329 len_remaining--; 323 * but will take exactly two bytes in the target string
324 */
325 i += charlen;
326 put_unaligned(dst_char, &target[j]);
330 } 327 }
331 328
332ctoUCS_out: 329ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 7fe6b52df507..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -44,7 +44,7 @@
44 * reserved symbols (along with \ and /), otherwise illegal to store 44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS 45 * in filenames in NTFS
46 */ 46 */
47#define UNI_ASTERIK (__u16) ('*' + 0xF000) 47#define UNI_ASTERISK (__u16) ('*' + 0xF000)
48#define UNI_QUESTION (__u16) ('?' + 0xF000) 48#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000) 49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_ucs(const char *src, const int maxlen, 82char *cifs_strndup_from_ucs(const char *src, const int maxlen,
83 const bool is_unicode, 83 const bool is_unicode,
84 const struct nls_table *codepage); 84 const struct nls_table *codepage);
85extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
87
85#endif 88#endif
86 89
87/* 90/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
26#include "cifspdu.h" 30#include "cifspdu.h"
27#include "cifsglob.h" 31#include "cifsglob.h"
28#include "cifsacl.h" 32#include "cifsacl.h"
29#include "cifsproto.h" 33#include "cifsproto.h"
30#include "cifs_debug.h" 34#include "cifs_debug.h"
31 35
32
33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
36 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
38 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
41;
42
43
44/* security id for everyone/world system group */ 36/* security id for everyone/world system group */
45static const struct cifs_sid sid_everyone = { 37static const struct cifs_sid sid_everyone = {
46 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
50/* group users */ 42/* group users */
51static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
52 44
45const struct cred *root_cred;
53 46
54int match_sid(struct cifs_sid *ctsid) 47static void
48shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
49 int *nr_del)
55{ 50{
56 int i, j; 51 struct rb_node *node;
57 int num_subauth, num_sat, num_saw; 52 struct rb_node *tmp;
58 struct cifs_sid *cwsid; 53 struct cifs_sid_id *psidid;
54
55 node = rb_first(root);
56 while (node) {
57 tmp = node;
58 node = rb_next(tmp);
59 psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
60 if (nr_to_scan == 0 || *nr_del == nr_to_scan)
61 ++(*nr_rem);
62 else {
63 if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
64 && psidid->refcount == 0) {
65 rb_erase(tmp, root);
66 ++(*nr_del);
67 } else
68 ++(*nr_rem);
69 }
70 }
71}
72
73/*
74 * Run idmap cache shrinker.
75 */
76static int
77cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
78{
79 int nr_del = 0;
80 int nr_rem = 0;
81 struct rb_root *root;
82
83 root = &uidtree;
84 spin_lock(&siduidlock);
85 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
86 spin_unlock(&siduidlock);
87
88 root = &gidtree;
89 spin_lock(&sidgidlock);
90 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
91 spin_unlock(&sidgidlock);
92
93 return nr_rem;
94}
95
96static struct shrinker cifs_shrinker = {
97 .shrink = cifs_idmap_shrinker,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static int
102cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
103{
104 char *payload;
105
106 payload = kmalloc(datalen, GFP_KERNEL);
107 if (!payload)
108 return -ENOMEM;
109
110 memcpy(payload, data, datalen);
111 key->payload.data = payload;
112 return 0;
113}
114
115static inline void
116cifs_idmap_key_destroy(struct key *key)
117{
118 kfree(key->payload.data);
119}
59 120
60 if (!ctsid) 121struct key_type cifs_idmap_key_type = {
61 return -1; 122 .name = "cifs.idmap",
123 .instantiate = cifs_idmap_key_instantiate,
124 .destroy = cifs_idmap_key_destroy,
125 .describe = user_describe,
126 .match = user_match,
127};
128
129static void
130sid_to_str(struct cifs_sid *sidptr, char *sidstr)
131{
132 int i;
133 unsigned long saval;
134 char *strptr;
62 135
63 for (i = 0; i < NUM_WK_SIDS; ++i) { 136 strptr = sidstr;
64 cwsid = &(wksidarr[i].cifssid);
65 137
66 /* compare the revision */ 138 sprintf(strptr, "%s", "S");
67 if (ctsid->revision != cwsid->revision) 139 strptr = sidstr + strlen(sidstr);
68 continue;
69 140
70 /* compare all of the six auth values */ 141 sprintf(strptr, "-%d", sidptr->revision);
71 for (j = 0; j < 6; ++j) { 142 strptr = sidstr + strlen(sidstr);
72 if (ctsid->authority[j] != cwsid->authority[j]) 143
73 break; 144 for (i = 0; i < 6; ++i) {
145 if (sidptr->authority[i]) {
146 sprintf(strptr, "-%d", sidptr->authority[i]);
147 strptr = sidstr + strlen(sidstr);
74 } 148 }
75 if (j < 6) 149 }
76 continue; /* all of the auth values did not match */ 150
77 151 for (i = 0; i < sidptr->num_subauth; ++i) {
78 /* compare all of the subauth values if any */ 152 saval = le32_to_cpu(sidptr->sub_auth[i]);
79 num_sat = ctsid->num_subauth; 153 sprintf(strptr, "-%ld", saval);
80 num_saw = cwsid->num_subauth; 154 strptr = sidstr + strlen(sidstr);
81 num_subauth = num_sat < num_saw ? num_sat : num_saw; 155 }
82 if (num_subauth) { 156}
83 for (j = 0; j < num_subauth; ++j) { 157
84 if (ctsid->sub_auth[j] != cwsid->sub_auth[j]) 158static void
85 break; 159id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
86 } 160 struct cifs_sid_id **psidid, char *typestr)
87 if (j < num_subauth) 161{
88 continue; /* all sub_auth values do not match */ 162 int rc;
163 char *strptr;
164 struct rb_node *node = root->rb_node;
165 struct rb_node *parent = NULL;
166 struct rb_node **linkto = &(root->rb_node);
167 struct cifs_sid_id *lsidid;
168
169 while (node) {
170 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
171 parent = node;
172 rc = compare_sids(sidptr, &((lsidid)->sid));
173 if (rc > 0) {
174 linkto = &(node->rb_left);
175 node = node->rb_left;
176 } else if (rc < 0) {
177 linkto = &(node->rb_right);
178 node = node->rb_right;
179 }
180 }
181
182 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
183 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
184 (*psidid)->refcount = 0;
185
186 sprintf((*psidid)->sidstr, "%s", typestr);
187 strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
188 sid_to_str(&(*psidid)->sid, strptr);
189
190 clear_bit(SID_ID_PENDING, &(*psidid)->state);
191 clear_bit(SID_ID_MAPPED, &(*psidid)->state);
192
193 rb_link_node(&(*psidid)->rbnode, parent, linkto);
194 rb_insert_color(&(*psidid)->rbnode, root);
195}
196
197static struct cifs_sid_id *
198id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
199{
200 int rc;
201 struct rb_node *node = root->rb_node;
202 struct cifs_sid_id *lsidid;
203
204 while (node) {
205 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
206 rc = compare_sids(sidptr, &((lsidid)->sid));
207 if (rc > 0) {
208 node = node->rb_left;
209 } else if (rc < 0) {
210 node = node->rb_right;
211 } else /* node found */
212 return lsidid;
213 }
214
215 return NULL;
216}
217
218static int
219sidid_pending_wait(void *unused)
220{
221 schedule();
222 return signal_pending(current) ? -ERESTARTSYS : 0;
223}
224
225static int
226sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
227 struct cifs_fattr *fattr, uint sidtype)
228{
229 int rc;
230 unsigned long cid;
231 struct key *idkey;
232 const struct cred *saved_cred;
233 struct cifs_sid_id *psidid, *npsidid;
234 struct rb_root *cidtree;
235 spinlock_t *cidlock;
236
237 if (sidtype == SIDOWNER) {
238 cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
239 cidlock = &siduidlock;
240 cidtree = &uidtree;
241 } else if (sidtype == SIDGROUP) {
242 cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
243 cidlock = &sidgidlock;
244 cidtree = &gidtree;
245 } else
246 return -ENOENT;
247
248 spin_lock(cidlock);
249 psidid = id_rb_search(cidtree, psid);
250
251 if (!psidid) { /* node does not exist, allocate one & attempt adding */
252 spin_unlock(cidlock);
253 npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
254 if (!npsidid)
255 return -ENOMEM;
256
257 npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
258 if (!npsidid->sidstr) {
259 kfree(npsidid);
260 return -ENOMEM;
261 }
262
263 spin_lock(cidlock);
264 psidid = id_rb_search(cidtree, psid);
265 if (psidid) { /* node happened to get inserted meanwhile */
266 ++psidid->refcount;
267 spin_unlock(cidlock);
268 kfree(npsidid->sidstr);
269 kfree(npsidid);
270 } else {
271 psidid = npsidid;
272 id_rb_insert(cidtree, psid, &psidid,
273 sidtype == SIDOWNER ? "os:" : "gs:");
274 ++psidid->refcount;
275 spin_unlock(cidlock);
89 } 276 }
277 } else {
278 ++psidid->refcount;
279 spin_unlock(cidlock);
280 }
281
282 /*
283 * If we are here, it is safe to access psidid and its fields
284 * since a reference was taken earlier while holding the spinlock.
285 * A reference on the node is put without holding the spinlock
286 * and it is OK to do so in this case, shrinker will not erase
287 * this node until all references are put and we do not access
288 * any fields of the node after a reference is put .
289 */
290 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
291 cid = psidid->id;
292 psidid->time = jiffies; /* update ts for accessing */
293 goto sid_to_id_out;
294 }
90 295
91 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname); 296 if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
92 return 0; /* sids compare/match */ 297 goto sid_to_id_out;
298
299 if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
300 saved_cred = override_creds(root_cred);
301 idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
302 if (IS_ERR(idkey))
303 cFYI(1, "%s: Can't map SID to an id", __func__);
304 else {
305 cid = *(unsigned long *)idkey->payload.value;
306 psidid->id = cid;
307 set_bit(SID_ID_MAPPED, &psidid->state);
308 key_put(idkey);
309 kfree(psidid->sidstr);
310 }
311 revert_creds(saved_cred);
312 psidid->time = jiffies; /* update ts for accessing */
313 clear_bit(SID_ID_PENDING, &psidid->state);
314 wake_up_bit(&psidid->state, SID_ID_PENDING);
315 } else {
316 rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
317 sidid_pending_wait, TASK_INTERRUPTIBLE);
318 if (rc) {
319 cFYI(1, "%s: sidid_pending_wait interrupted %d",
320 __func__, rc);
321 --psidid->refcount; /* decremented without spinlock */
322 return rc;
323 }
324 if (test_bit(SID_ID_MAPPED, &psidid->state))
325 cid = psidid->id;
93 } 326 }
94 327
95 cFYI(1, "No matching sid"); 328sid_to_id_out:
96 return -1; 329 --psidid->refcount; /* decremented without spinlock */
330 if (sidtype == SIDOWNER)
331 fattr->cf_uid = cid;
332 else
333 fattr->cf_gid = cid;
334
335 return 0;
336}
337
338int
339init_cifs_idmap(void)
340{
341 struct cred *cred;
342 struct key *keyring;
343 int ret;
344
345 cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
346
347 /* create an override credential set with a special thread keyring in
348 * which requests are cached
349 *
350 * this is used to prevent malicious redirections from being installed
351 * with add_key().
352 */
353 cred = prepare_kernel_cred(NULL);
354 if (!cred)
355 return -ENOMEM;
356
357 keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
358 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
359 KEY_USR_VIEW | KEY_USR_READ,
360 KEY_ALLOC_NOT_IN_QUOTA);
361 if (IS_ERR(keyring)) {
362 ret = PTR_ERR(keyring);
363 goto failed_put_cred;
364 }
365
366 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
367 if (ret < 0)
368 goto failed_put_key;
369
370 ret = register_key_type(&cifs_idmap_key_type);
371 if (ret < 0)
372 goto failed_put_key;
373
374 /* instruct request_key() to use this special keyring as a cache for
375 * the results it looks up */
376 cred->thread_keyring = keyring;
377 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
378 root_cred = cred;
379
380 spin_lock_init(&siduidlock);
381 uidtree = RB_ROOT;
382 spin_lock_init(&sidgidlock);
383 gidtree = RB_ROOT;
384
385 register_shrinker(&cifs_shrinker);
386
387 cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
388 return 0;
389
390failed_put_key:
391 key_put(keyring);
392failed_put_cred:
393 put_cred(cred);
394 return ret;
395}
396
397void
398exit_cifs_idmap(void)
399{
400 key_revoke(root_cred->thread_keyring);
401 unregister_key_type(&cifs_idmap_key_type);
402 put_cred(root_cred);
403 unregister_shrinker(&cifs_shrinker);
404 cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
405}
406
407void
408cifs_destroy_idmaptrees(void)
409{
410 struct rb_root *root;
411 struct rb_node *node;
412
413 root = &uidtree;
414 spin_lock(&siduidlock);
415 while ((node = rb_first(root)))
416 rb_erase(node, root);
417 spin_unlock(&siduidlock);
418
419 root = &gidtree;
420 spin_lock(&sidgidlock);
421 while ((node = rb_first(root)))
422 rb_erase(node, root);
423 spin_unlock(&sidgidlock);
97} 424}
98 425
99/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 426/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
104 int num_subauth, num_sat, num_saw; 431 int num_subauth, num_sat, num_saw;
105 432
106 if ((!ctsid) || (!cwsid)) 433 if ((!ctsid) || (!cwsid))
107 return 0; 434 return 1;
108 435
109 /* compare the revision */ 436 /* compare the revision */
110 if (ctsid->revision != cwsid->revision) 437 if (ctsid->revision != cwsid->revision) {
111 return 0; 438 if (ctsid->revision > cwsid->revision)
439 return 1;
440 else
441 return -1;
442 }
112 443
113 /* compare all of the six auth values */ 444 /* compare all of the six auth values */
114 for (i = 0; i < 6; ++i) { 445 for (i = 0; i < 6; ++i) {
115 if (ctsid->authority[i] != cwsid->authority[i]) 446 if (ctsid->authority[i] != cwsid->authority[i]) {
116 return 0; 447 if (ctsid->authority[i] > cwsid->authority[i])
448 return 1;
449 else
450 return -1;
451 }
117 } 452 }
118 453
119 /* compare all of the subauth values if any */ 454 /* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
122 num_subauth = num_sat < num_saw ? num_sat : num_saw; 457 num_subauth = num_sat < num_saw ? num_sat : num_saw;
123 if (num_subauth) { 458 if (num_subauth) {
124 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
125 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
126 return 0; 461 if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
462 return 1;
463 else
464 return -1;
465 }
127 } 466 }
128 } 467 }
129 468
130 return 1; /* sids compare/match */ 469 return 0; /* sids compare/match */
131} 470}
132 471
133 472
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
382#ifdef CONFIG_CIFS_DEBUG2 721#ifdef CONFIG_CIFS_DEBUG2
383 dump_ace(ppace[i], end_of_acl); 722 dump_ace(ppace[i], end_of_acl);
384#endif 723#endif
385 if (compare_sids(&(ppace[i]->sid), pownersid)) 724 if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
386 access_flags_to_mode(ppace[i]->access_req, 725 access_flags_to_mode(ppace[i]->access_req,
387 ppace[i]->type, 726 ppace[i]->type,
388 &fattr->cf_mode, 727 &fattr->cf_mode,
389 &user_mask); 728 &user_mask);
390 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 729 if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
391 access_flags_to_mode(ppace[i]->access_req, 730 access_flags_to_mode(ppace[i]->access_req,
392 ppace[i]->type, 731 ppace[i]->type,
393 &fattr->cf_mode, 732 &fattr->cf_mode,
394 &group_mask); 733 &group_mask);
395 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 734 if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
396 access_flags_to_mode(ppace[i]->access_req, 735 access_flags_to_mode(ppace[i]->access_req,
397 ppace[i]->type, 736 ppace[i]->type,
398 &fattr->cf_mode, 737 &fattr->cf_mode,
399 &other_mask); 738 &other_mask);
400 if (compare_sids(&(ppace[i]->sid), &sid_authusers)) 739 if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
401 access_flags_to_mode(ppace[i]->access_req, 740 access_flags_to_mode(ppace[i]->access_req,
402 ppace[i]->type, 741 ppace[i]->type,
403 &fattr->cf_mode, 742 &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
475 814
476 815
477/* Convert CIFS ACL to POSIX form */ 816/* Convert CIFS ACL to POSIX form */
478static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 817static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
479 struct cifs_fattr *fattr) 818 struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
480{ 819{
481 int rc; 820 int rc = 0;
482 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 821 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
483 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ 822 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
484 char *end_of_acl = ((char *)pntsd) + acl_len; 823 char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 le32_to_cpu(pntsd->sacloffset), dacloffset); 839 le32_to_cpu(pntsd->sacloffset), dacloffset);
501/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 840/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
502 rc = parse_sid(owner_sid_ptr, end_of_acl); 841 rc = parse_sid(owner_sid_ptr, end_of_acl);
503 if (rc) 842 if (rc) {
843 cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
844 return rc;
845 }
846 rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
847 if (rc) {
848 cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
504 return rc; 849 return rc;
850 }
505 851
506 rc = parse_sid(group_sid_ptr, end_of_acl); 852 rc = parse_sid(group_sid_ptr, end_of_acl);
507 if (rc) 853 if (rc) {
854 cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
508 return rc; 855 return rc;
856 }
857 rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
858 if (rc) {
859 cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
860 return rc;
861 }
509 862
510 if (dacloffset) 863 if (dacloffset)
511 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 864 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
520 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 873 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
521 sizeof(struct cifs_sid)); */ 874 sizeof(struct cifs_sid)); */
522 875
523 return 0; 876 return rc;
524} 877}
525 878
526 879
@@ -688,7 +1041,7 @@ out:
688} 1041}
689 1042
690/* Set an ACL on the server */ 1043/* Set an ACL on the server */
691static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 1044int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
692 struct inode *inode, const char *path) 1045 struct inode *inode, const char *path)
693{ 1046{
694 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1047 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
727 rc = PTR_ERR(pntsd); 1080 rc = PTR_ERR(pntsd);
728 cERROR(1, "%s: error %d getting sec desc", __func__, rc); 1081 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
729 } else { 1082 } else {
730 rc = parse_sec_desc(pntsd, acllen, fattr); 1083 rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
731 kfree(pntsd); 1084 kfree(pntsd);
732 if (rc) 1085 if (rc)
733 cERROR(1, "parse sec desc failed rc = %d", rc); 1086 cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
39#define ACCESS_ALLOWED 0 39#define ACCESS_ALLOWED 0
40#define ACCESS_DENIED 1 40#define ACCESS_DENIED 1
41 41
42#define SIDOWNER 1
43#define SIDGROUP 2
44#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
45
46#define SID_ID_MAPPED 0
47#define SID_ID_PENDING 1
48#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
49#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
50
42struct cifs_ntsd { 51struct cifs_ntsd {
43 __le16 revision; /* revision level */ 52 __le16 revision; /* revision level */
44 __le16 type; 53 __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 83 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 84} __attribute__((packed));
76 85
77extern int match_sid(struct cifs_sid *); 86struct cifs_sid_id {
87 unsigned int refcount; /* increment with spinlock, decrement without */
88 unsigned long id;
89 unsigned long time;
90 unsigned long state;
91 char *sidstr;
92 struct rb_node rbnode;
93 struct cifs_sid sid;
94};
95
96#ifdef __KERNEL__
97extern struct key_type cifs_idmap_key_type;
98extern const struct cred *root_cred;
99#endif /* KERNEL */
100
78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 101extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
79 102
80#endif /* _CIFSACL_H */ 103#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a51585f9852b..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -30,12 +30,13 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/random.h> 31#include <linux/random.h>
32 32
33/* Calculate and return the CIFS signature based on the mac key and SMB PDU */ 33/*
34/* the 16 byte signature must be allocated by the caller */ 34 * Calculate and return the CIFS signature based on the mac key and SMB PDU.
35/* Note we only use the 1st eight bytes */ 35 * The 16 byte signature must be allocated by the caller. Note we only use the
36/* Note that the smb header signature field on input contains the 36 * 1st eight bytes and that the smb header signature field on input contains
37 sequence number before this function is called */ 37 * the sequence number before this function is called. Also, this function
38 38 * should be called with the server->srv_mutex held.
39 */
39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 40static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
40 struct TCP_Server_Info *server, char *signature) 41 struct TCP_Server_Info *server, char *signature)
41{ 42{
@@ -59,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
59 server->session_key.response, server->session_key.len); 60 server->session_key.response, server->session_key.len);
60 61
61 crypto_shash_update(&server->secmech.sdescmd5->shash, 62 crypto_shash_update(&server->secmech.sdescmd5->shash,
62 cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 63 cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
63 64
64 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
65 66
@@ -209,8 +210,10 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 cpu_to_le32(expected_sequence_number); 210 cpu_to_le32(expected_sequence_number);
210 cifs_pdu->Signature.Sequence.Reserved = 0; 211 cifs_pdu->Signature.Sequence.Reserved = 0;
211 212
213 mutex_lock(&server->srv_mutex);
212 rc = cifs_calculate_signature(cifs_pdu, server, 214 rc = cifs_calculate_signature(cifs_pdu, server,
213 what_we_think_sig_should_be); 215 what_we_think_sig_should_be);
216 mutex_unlock(&server->srv_mutex);
214 217
215 if (rc) 218 if (rc)
216 return rc; 219 return rc;
@@ -265,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
265} 268}
266 269
267#ifdef CONFIG_CIFS_WEAK_PW_HASH 270#ifdef CONFIG_CIFS_WEAK_PW_HASH
268void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 271int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
269 char *lnm_session_key) 272 char *lnm_session_key)
270{ 273{
271 int i; 274 int i;
275 int rc;
272 char password_with_pad[CIFS_ENCPWD_SIZE]; 276 char password_with_pad[CIFS_ENCPWD_SIZE];
273 277
274 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 278 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -279,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
279 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 283 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
280 memcpy(lnm_session_key, password_with_pad, 284 memcpy(lnm_session_key, password_with_pad,
281 CIFS_ENCPWD_SIZE); 285 CIFS_ENCPWD_SIZE);
282 return; 286 return 0;
283 } 287 }
284 288
285 /* calculate old style session key */ 289 /* calculate old style session key */
@@ -296,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
296 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 300 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
297 password_with_pad[i] = toupper(password_with_pad[i]); 301 password_with_pad[i] = toupper(password_with_pad[i]);
298 302
299 SMBencrypt(password_with_pad, cryptkey, lnm_session_key); 303 rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
300 304
301 /* clear password before we return/free memory */ 305 return rc;
302 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
303} 306}
304#endif /* CIFS_WEAK_PW_HASH */ 307#endif /* CIFS_WEAK_PW_HASH */
305 308
@@ -469,15 +472,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
469 return rc; 472 return rc;
470 } 473 }
471 474
472 /* convert ses->userName to unicode and uppercase */ 475 /* convert ses->user_name to unicode and uppercase */
473 len = strlen(ses->userName); 476 len = strlen(ses->user_name);
474 user = kmalloc(2 + (len * 2), GFP_KERNEL); 477 user = kmalloc(2 + (len * 2), GFP_KERNEL);
475 if (user == NULL) { 478 if (user == NULL) {
476 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); 479 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
477 rc = -ENOMEM; 480 rc = -ENOMEM;
478 goto calc_exit_2; 481 goto calc_exit_2;
479 } 482 }
480 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 483 len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
481 UniStrupr(user); 484 UniStrupr(user);
482 485
483 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, 486 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f2970136d17d..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -53,7 +53,6 @@ int cifsFYI = 0;
53int cifsERROR = 1; 53int cifsERROR = 1;
54int traceSMB = 0; 54int traceSMB = 0;
55unsigned int oplockEnabled = 1; 55unsigned int oplockEnabled = 1;
56unsigned int experimEnabled = 0;
57unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
58unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
59unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
@@ -127,30 +126,24 @@ cifs_read_super(struct super_block *sb, void *data,
127 kfree(cifs_sb); 126 kfree(cifs_sb);
128 return rc; 127 return rc;
129 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131 /*
132 /* copy mount params to sb for use in submounts */ 132 * Copy mount params to sb for use in submounts. Better to do
133 /* BB: should we move this after the mount so we 133 * the copy here and deal with the error before cleanup gets
134 * do not have to do the copy on failed mounts? 134 * complicated post-mount.
135 * BB: May be it is better to do simple copy before 135 */
136 * complex operation (mount), and in case of fail
137 * just exit instead of doing mount and attempting
138 * undo it if this copy fails?*/
139 if (data) { 136 if (data) {
140 int len = strlen(data); 137 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
141 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
142 if (cifs_sb->mountdata == NULL) { 138 if (cifs_sb->mountdata == NULL) {
143 bdi_destroy(&cifs_sb->bdi); 139 bdi_destroy(&cifs_sb->bdi);
144 kfree(sb->s_fs_info); 140 kfree(sb->s_fs_info);
145 sb->s_fs_info = NULL; 141 sb->s_fs_info = NULL;
146 return -ENOMEM; 142 return -ENOMEM;
147 } 143 }
148 strncpy(cifs_sb->mountdata, data, len + 1);
149 cifs_sb->mountdata[len] = '\0';
150 } 144 }
151#endif
152 145
153 rc = cifs_mount(sb, cifs_sb, data, devname); 146 rc = cifs_mount(sb, cifs_sb, devname);
154 147
155 if (rc) { 148 if (rc) {
156 if (!silent) 149 if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
163 sb->s_bdi = &cifs_sb->bdi; 156 sb->s_bdi = &cifs_sb->bdi;
164 sb->s_blocksize = CIFS_MAX_MSGSIZE; 157 sb->s_blocksize = CIFS_MAX_MSGSIZE;
165 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 158 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
166 inode = cifs_root_iget(sb, ROOT_I); 159 inode = cifs_root_iget(sb);
167 160
168 if (IS_ERR(inode)) { 161 if (IS_ERR(inode)) {
169 rc = PTR_ERR(inode); 162 rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
184 else 177 else
185 sb->s_d_op = &cifs_dentry_ops; 178 sb->s_d_op = &cifs_dentry_ops;
186 179
187#ifdef CONFIG_CIFS_EXPERIMENTAL 180#ifdef CIFS_NFSD_EXPORT
188 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
189 cFYI(1, "export ops supported"); 182 cFYI(1, "export ops supported");
190 sb->s_export_op = &cifs_export_ops; 183 sb->s_export_op = &cifs_export_ops;
191 } 184 }
192#endif /* EXPERIMENTAL */ 185#endif /* CIFS_NFSD_EXPORT */
193 186
194 return 0; 187 return 0;
195 188
@@ -202,12 +195,10 @@ out_no_root:
202 195
203out_mount_failed: 196out_mount_failed:
204 if (cifs_sb) { 197 if (cifs_sb) {
205#ifdef CONFIG_CIFS_DFS_UPCALL
206 if (cifs_sb->mountdata) { 198 if (cifs_sb->mountdata) {
207 kfree(cifs_sb->mountdata); 199 kfree(cifs_sb->mountdata);
208 cifs_sb->mountdata = NULL; 200 cifs_sb->mountdata = NULL;
209 } 201 }
210#endif
211 unload_nls(cifs_sb->local_nls); 202 unload_nls(cifs_sb->local_nls);
212 bdi_destroy(&cifs_sb->bdi); 203 bdi_destroy(&cifs_sb->bdi);
213 kfree(cifs_sb); 204 kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
231 rc = cifs_umount(sb, cifs_sb); 222 rc = cifs_umount(sb, cifs_sb);
232 if (rc) 223 if (rc)
233 cERROR(1, "cifs_umount failed with return code %d", rc); 224 cERROR(1, "cifs_umount failed with return code %d", rc);
234#ifdef CONFIG_CIFS_DFS_UPCALL
235 if (cifs_sb->mountdata) { 225 if (cifs_sb->mountdata) {
236 kfree(cifs_sb->mountdata); 226 kfree(cifs_sb->mountdata);
237 cifs_sb->mountdata = NULL; 227 cifs_sb->mountdata = NULL;
238 } 228 }
239#endif
240 229
241 unload_nls(cifs_sb->local_nls); 230 unload_nls(cifs_sb->local_nls);
242 bdi_destroy(&cifs_sb->bdi); 231 bdi_destroy(&cifs_sb->bdi);
@@ -409,8 +398,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
409 398
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 399 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
411 seq_printf(s, ",multiuser"); 400 seq_printf(s, ",multiuser");
412 else if (tcon->ses->userName) 401 else if (tcon->ses->user_name)
413 seq_printf(s, ",username=%s", tcon->ses->userName); 402 seq_printf(s, ",username=%s", tcon->ses->user_name);
414 403
415 if (tcon->ses->domainName) 404 if (tcon->ses->domainName)
416 seq_printf(s, ",domain=%s", tcon->ses->domainName); 405 seq_printf(s, ",domain=%s", tcon->ses->domainName);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
618{ 607{
619 /* origin == SEEK_END => we must revalidate the cached file length */ 608 /* origin == SEEK_END => we must revalidate the cached file length */
620 if (origin == SEEK_END) { 609 if (origin == SEEK_END) {
621 int retval; 610 int rc;
622 611 struct inode *inode = file->f_path.dentry->d_inode;
623 /* some applications poll for the file length in this strange 612
624 way so we must seek to end on non-oplocked files by 613 /*
625 setting the revalidate time to zero */ 614 * We need to be sure that all dirty pages are written and the
626 CIFS_I(file->f_path.dentry->d_inode)->time = 0; 615 * server has the newest file length.
627 616 */
628 retval = cifs_revalidate_file(file); 617 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
629 if (retval < 0) 618 inode->i_mapping->nrpages != 0) {
630 return (loff_t)retval; 619 rc = filemap_fdatawait(inode->i_mapping);
620 if (rc) {
621 mapping_set_error(inode->i_mapping, rc);
622 return rc;
623 }
624 }
625 /*
626 * Some applications poll for the file length in this strange
627 * way so we must seek to end on non-oplocked files by
628 * setting the revalidate time to zero.
629 */
630 CIFS_I(inode)->time = 0;
631
632 rc = cifs_revalidate_file_attr(file);
633 if (rc < 0)
634 return (loff_t)rc;
631 } 635 }
632 return generic_file_llseek_unlocked(file, offset, origin); 636 return generic_file_llseek_unlocked(file, offset, origin);
633} 637}
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
760}; 764};
761 765
762const struct file_operations cifs_file_direct_ops = { 766const struct file_operations cifs_file_direct_ops = {
763 /* no aio, no readv - 767 /* BB reevaluate whether they can be done with directio, no cache */
764 BB reevaluate whether they can be done with directio, no cache */ 768 .read = do_sync_read,
765 .read = cifs_user_read, 769 .write = do_sync_write,
766 .write = cifs_user_write, 770 .aio_read = cifs_user_readv,
771 .aio_write = cifs_user_writev,
767 .open = cifs_open, 772 .open = cifs_open,
768 .release = cifs_close, 773 .release = cifs_close,
769 .lock = cifs_lock, 774 .lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
815}; 820};
816 821
817const struct file_operations cifs_file_direct_nobrl_ops = { 822const struct file_operations cifs_file_direct_nobrl_ops = {
818 /* no mmap, no aio, no readv - 823 /* BB reevaluate whether they can be done with directio, no cache */
819 BB reevaluate whether they can be done with directio, no cache */ 824 .read = do_sync_read,
820 .read = cifs_user_read, 825 .write = do_sync_write,
821 .write = cifs_user_write, 826 .aio_read = cifs_user_readv,
827 .aio_write = cifs_user_writev,
822 .open = cifs_open, 828 .open = cifs_open,
823 .release = cifs_close, 829 .release = cifs_close,
824 .fsync = cifs_fsync, 830 .fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
981 int rc = 0; 987 int rc = 0;
982 cifs_proc_init(); 988 cifs_proc_init();
983 INIT_LIST_HEAD(&cifs_tcp_ses_list); 989 INIT_LIST_HEAD(&cifs_tcp_ses_list);
984#ifdef CONFIG_CIFS_EXPERIMENTAL 990#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
985 INIT_LIST_HEAD(&GlobalDnotifyReqList); 991 INIT_LIST_HEAD(&GlobalDnotifyReqList);
986 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 992 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
987#endif 993#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
988/* 994/*
989 * Initialize Global counters 995 * Initialize Global counters
990 */ 996 */
@@ -1033,22 +1039,33 @@ init_cifs(void)
1033 if (rc) 1039 if (rc)
1034 goto out_destroy_mids; 1040 goto out_destroy_mids;
1035 1041
1036 rc = register_filesystem(&cifs_fs_type);
1037 if (rc)
1038 goto out_destroy_request_bufs;
1039#ifdef CONFIG_CIFS_UPCALL 1042#ifdef CONFIG_CIFS_UPCALL
1040 rc = register_key_type(&cifs_spnego_key_type); 1043 rc = register_key_type(&cifs_spnego_key_type);
1041 if (rc) 1044 if (rc)
1042 goto out_unregister_filesystem; 1045 goto out_destroy_request_bufs;
1043#endif 1046#endif /* CONFIG_CIFS_UPCALL */
1047
1048#ifdef CONFIG_CIFS_ACL
1049 rc = init_cifs_idmap();
1050 if (rc)
1051 goto out_register_key_type;
1052#endif /* CONFIG_CIFS_ACL */
1053
1054 rc = register_filesystem(&cifs_fs_type);
1055 if (rc)
1056 goto out_init_cifs_idmap;
1044 1057
1045 return 0; 1058 return 0;
1046 1059
1047#ifdef CONFIG_CIFS_UPCALL 1060out_init_cifs_idmap:
1048out_unregister_filesystem: 1061#ifdef CONFIG_CIFS_ACL
1049 unregister_filesystem(&cifs_fs_type); 1062 exit_cifs_idmap();
1063out_register_key_type:
1050#endif 1064#endif
1065#ifdef CONFIG_CIFS_UPCALL
1066 unregister_key_type(&cifs_spnego_key_type);
1051out_destroy_request_bufs: 1067out_destroy_request_bufs:
1068#endif
1052 cifs_destroy_request_bufs(); 1069 cifs_destroy_request_bufs();
1053out_destroy_mids: 1070out_destroy_mids:
1054 cifs_destroy_mids(); 1071 cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
1070#ifdef CONFIG_CIFS_DFS_UPCALL 1087#ifdef CONFIG_CIFS_DFS_UPCALL
1071 cifs_dfs_release_automount_timer(); 1088 cifs_dfs_release_automount_timer();
1072#endif 1089#endif
1090#ifdef CONFIG_CIFS_ACL
1091 cifs_destroy_idmaptrees();
1092 exit_cifs_idmap();
1093#endif
1073#ifdef CONFIG_CIFS_UPCALL 1094#ifdef CONFIG_CIFS_UPCALL
1074 unregister_key_type(&cifs_spnego_key_type); 1095 unregister_key_type(&cifs_spnego_key_type);
1075#endif 1096#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
47 47
48/* Functions related to inodes */ 48/* Functions related to inodes */
49extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
50extern struct inode *cifs_root_iget(struct super_block *, unsigned long); 50extern struct inode *cifs_root_iget(struct super_block *);
51extern int cifs_create(struct inode *, struct dentry *, int, 51extern int cifs_create(struct inode *, struct dentry *, int,
52 struct nameidata *); 52 struct nameidata *);
53extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 53extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
59extern int cifs_rmdir(struct inode *, struct dentry *); 59extern int cifs_rmdir(struct inode *, struct dentry *);
60extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 60extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
61 struct dentry *); 61 struct dentry *);
62extern int cifs_revalidate_file_attr(struct file *filp);
63extern int cifs_revalidate_dentry_attr(struct dentry *);
62extern int cifs_revalidate_file(struct file *filp); 64extern int cifs_revalidate_file(struct file *filp);
63extern int cifs_revalidate_dentry(struct dentry *); 65extern int cifs_revalidate_dentry(struct dentry *);
64extern void cifs_invalidate_mapping(struct inode *inode); 66extern int cifs_invalidate_mapping(struct inode *inode);
65extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 67extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
66extern int cifs_setattr(struct dentry *, struct iattr *); 68extern int cifs_setattr(struct dentry *, struct iattr *);
67 69
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
80extern int cifs_open(struct inode *inode, struct file *file); 82extern int cifs_open(struct inode *inode, struct file *file);
81extern int cifs_close(struct inode *inode, struct file *file); 83extern int cifs_close(struct inode *inode, struct file *file);
82extern int cifs_closedir(struct inode *inode, struct file *file); 84extern int cifs_closedir(struct inode *inode, struct file *file);
83extern ssize_t cifs_user_read(struct file *file, char __user *read_data, 85extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
84 size_t read_size, loff_t *poffset); 86 unsigned long nr_segs, loff_t pos);
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 87extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 88 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 89extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
88 size_t write_size, loff_t *poffset); 90 unsigned long nr_segs, loff_t pos);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
91extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
123extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 125extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
124extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 126extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125 127
126#ifdef CONFIG_CIFS_EXPERIMENTAL 128#ifdef CIFS_NFSD_EXPORT
127extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 130#endif /* CIFS_NFSD_EXPORT */
129 131
130#define CIFS_VERSION "1.71" 132#define CIFS_VERSION "1.72"
131#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 17afb0fbcaed..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -37,10 +37,9 @@
37 37
38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
39#define MAX_SERVER_SIZE 15 39#define MAX_SERVER_SIZE 15
40#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 40#define MAX_SHARE_SIZE 80
41#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 41#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
42 termination then *2 for unicode versions */ 42#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
43#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
44 43
45#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
46 45
@@ -92,7 +91,8 @@ enum statusEnum {
92 CifsNew = 0, 91 CifsNew = 0,
93 CifsGood, 92 CifsGood,
94 CifsExiting, 93 CifsExiting,
95 CifsNeedReconnect 94 CifsNeedReconnect,
95 CifsNeedNegotiate
96}; 96};
97 97
98enum securityEnum { 98enum securityEnum {
@@ -274,7 +274,8 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char userName[MAX_USERNAME_SIZE + 1]; 277 char *user_name; /* must not be null except during init of sess
278 and after mount option parsing we fill it */
278 char *domainName; 279 char *domainName;
279 char *password; 280 char *password;
280 struct session_key auth_key; 281 struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
780 */ 781 */
781GLOBAL_EXTERN spinlock_t cifs_file_list_lock; 782GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
782 783
784#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
783/* Outstanding dir notify requests */ 785/* Outstanding dir notify requests */
784GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 786GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
785/* DirNotify response queue */ 787/* DirNotify response queue */
786GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; 788GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
789#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
787 790
788/* 791/*
789 * Global transaction id (XID) information 792 * Global transaction id (XID) information
@@ -817,7 +820,6 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
817 have the uid/password or Kerberos credential 820 have the uid/password or Kerberos credential
818 or equivalent for current user */ 821 or equivalent for current user */
819GLOBAL_EXTERN unsigned int oplockEnabled; 822GLOBAL_EXTERN unsigned int oplockEnabled;
820GLOBAL_EXTERN unsigned int experimEnabled;
821GLOBAL_EXTERN unsigned int lookupCacheEnabled; 823GLOBAL_EXTERN unsigned int lookupCacheEnabled;
822GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent 824GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
823 with more secure ntlmssp2 challenge/resp */ 825 with more secure ntlmssp2 challenge/resp */
@@ -831,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
831/* reconnect after this many failed echo attempts */ 833/* reconnect after this many failed echo attempts */
832GLOBAL_EXTERN unsigned short echo_retries; 834GLOBAL_EXTERN unsigned short echo_retries;
833 835
836GLOBAL_EXTERN struct rb_root uidtree;
837GLOBAL_EXTERN struct rb_root gidtree;
838GLOBAL_EXTERN spinlock_t siduidlock;
839GLOBAL_EXTERN spinlock_t sidgidlock;
840
834void cifs_oplock_break(struct work_struct *work); 841void cifs_oplock_break(struct work_struct *work);
835void cifs_oplock_break_get(struct cifsFileInfo *cfile); 842void cifs_oplock_break_get(struct cifsFileInfo *cfile);
836void cifs_oplock_break_put(struct cifsFileInfo *cfile); 843void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */ 397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
398 398
399struct smb_hdr { 399struct smb_hdr {
400 __u32 smb_buf_length; /* big endian on wire *//* BB length is only two 400 __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
401 or three bytes - with one or two byte type preceding it that are 401 with one or two byte "type" preceding it that will be
402 zero - we could mask the type byte off just in case BB */ 402 zero - we could mask the type byte off */
403 __u8 Protocol[4]; 403 __u8 Protocol[4];
404 __u8 Command; 404 __u8 Command;
405 union { 405 union {
@@ -428,43 +428,28 @@ struct smb_hdr {
428 __u8 WordCount; 428 __u8 WordCount;
429} __attribute__((packed)); 429} __attribute__((packed));
430 430
431/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ 431/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
432#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ 432static inline void *
433 (2 * (smb_var)->WordCount)) 433BCC(struct smb_hdr *smb)
434{
435 return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
436}
434 437
435/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 438/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
436#define pByteArea(smb_var) (BCC(smb_var) + 2) 439#define pByteArea(smb_var) (BCC(smb_var) + 2)
437 440
438/* get the converted ByteCount for a SMB packet and return it */
439static inline __u16
440get_bcc(struct smb_hdr *hdr)
441{
442 __u16 *bc_ptr = (__u16 *)BCC(hdr);
443
444 return get_unaligned(bc_ptr);
445}
446
447/* get the unconverted ByteCount for a SMB packet and return it */ 441/* get the unconverted ByteCount for a SMB packet and return it */
448static inline __u16 442static inline __u16
449get_bcc_le(struct smb_hdr *hdr) 443get_bcc(struct smb_hdr *hdr)
450{ 444{
451 __le16 *bc_ptr = (__le16 *)BCC(hdr); 445 __le16 *bc_ptr = (__le16 *)BCC(hdr);
452 446
453 return get_unaligned_le16(bc_ptr); 447 return get_unaligned_le16(bc_ptr);
454} 448}
455 449
456/* set the ByteCount for a SMB packet in host-byte order */
457static inline void
458put_bcc(__u16 count, struct smb_hdr *hdr)
459{
460 __u16 *bc_ptr = (__u16 *)BCC(hdr);
461
462 put_unaligned(count, bc_ptr);
463}
464
465/* set the ByteCount for a SMB packet in little-endian */ 450/* set the ByteCount for a SMB packet in little-endian */
466static inline void 451static inline void
467put_bcc_le(__u16 count, struct smb_hdr *hdr) 452put_bcc(__u16 count, struct smb_hdr *hdr)
468{ 453{
469 __le16 *bc_ptr = (__le16 *)BCC(hdr); 454 __le16 *bc_ptr = (__le16 *)BCC(hdr);
470 455
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do { \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ 53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void);
56extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon); 61 struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 93extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 94extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
92extern unsigned int smbCalcSize(struct smb_hdr *ptr); 95extern unsigned int smbCalcSize(struct smb_hdr *ptr);
93extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
94extern int decode_negTokenInit(unsigned char *security_blob, int length, 96extern int decode_negTokenInit(unsigned char *security_blob, int length,
95 struct TCP_Server_Info *server); 97 struct TCP_Server_Info *server);
96extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 98extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
143extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); 145extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
144extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 146extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
145 const char *, u32 *); 147 const char *, u32 *);
148extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
149 const char *);
146 150
147extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 151extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
148 const char *); 152 const char *);
149extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 153extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
150extern void cifs_dfs_release_automount_timer(void); 154extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
304 struct cifsTconInfo *tcon, 308 struct cifsTconInfo *tcon,
305 const unsigned char *searchName, char **syminfo, 309 const unsigned char *searchName, char **syminfo,
306 const struct nls_table *nls_codepage); 310 const struct nls_table *nls_codepage);
311#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
307extern int CIFSSMBQueryReparseLinkInfo(const int xid, 312extern int CIFSSMBQueryReparseLinkInfo(const int xid,
308 struct cifsTconInfo *tcon, 313 struct cifsTconInfo *tcon,
309 const unsigned char *searchName, 314 const unsigned char *searchName,
310 char *symlinkinfo, const int buflen, __u16 fid, 315 char *symlinkinfo, const int buflen, __u16 fid,
311 const struct nls_table *nls_codepage); 316 const struct nls_table *nls_codepage);
312 317#endif /* temporarily unused until cifs_symlink fixed */
313extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 318extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
314 const char *fileName, const int disposition, 319 const char *fileName, const int disposition,
315 const int access_flags, const int omode, 320 const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
348 const unsigned char *searchName, __u64 *inode_number, 353 const unsigned char *searchName, __u64 *inode_number,
349 const struct nls_table *nls_codepage, 354 const struct nls_table *nls_codepage,
350 int remap_special_chars); 355 int remap_special_chars);
351extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
352 const struct nls_table *cp, int mapChars);
353 356
354extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
355 const __u16 netfid, const __u64 len, 358 const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
383extern int calc_seckey(struct cifsSesInfo *); 386extern int calc_seckey(struct cifsSesInfo *);
384 387
385#ifdef CONFIG_CIFS_WEAK_PW_HASH 388#ifdef CONFIG_CIFS_WEAK_PW_HASH
386extern void calc_lanman_hash(const char *password, const char *cryptkey, 389extern int calc_lanman_hash(const char *password, const char *cryptkey,
387 bool encrypt, char *lnm_session_key); 390 bool encrypt, char *lnm_session_key);
388#endif /* CIFS_WEAK_PW_HASH */ 391#endif /* CIFS_WEAK_PW_HASH */
392#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
393extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
394 const int notify_subdirs, const __u16 netfid,
395 __u32 filter, struct file *file, int multishot,
396 const struct nls_table *nls_codepage);
397#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
389extern int CIFSSMBCopy(int xid, 398extern int CIFSSMBCopy(int xid,
390 struct cifsTconInfo *source_tcon, 399 struct cifsTconInfo *source_tcon,
391 const char *fromName, 400 const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
393 const char *toName, const int flags, 402 const char *toName, const int flags,
394 const struct nls_table *nls_codepage, 403 const struct nls_table *nls_codepage,
395 int remap_special_chars); 404 int remap_special_chars);
396extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
397 const int notify_subdirs, const __u16 netfid,
398 __u32 filter, struct file *file, int multishot,
399 const struct nls_table *nls_codepage);
400extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 405extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
401 const unsigned char *searchName, 406 const unsigned char *searchName,
402 const unsigned char *ea_name, char *EAData, 407 const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
427 struct cifs_sb_info *cifs_sb, int xid); 432 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int); 433extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 434extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 435extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24); 436 unsigned char *p24);
435#endif /* _CIFSPROTO_H */ 437#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 904aa47e3515..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -142,9 +142,9 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
142 */ 142 */
143 while (server->tcpStatus == CifsNeedReconnect) { 143 while (server->tcpStatus == CifsNeedReconnect) {
144 wait_event_interruptible_timeout(server->response_q, 144 wait_event_interruptible_timeout(server->response_q,
145 (server->tcpStatus == CifsGood), 10 * HZ); 145 (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
146 146
147 /* is TCP session is reestablished now ?*/ 147 /* are we still trying to reconnect? */
148 if (server->tcpStatus != CifsNeedReconnect) 148 if (server->tcpStatus != CifsNeedReconnect)
149 break; 149 break;
150 150
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) 339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
340 goto vt2_err; 340 goto vt2_err;
341 341
342 /* check that bcc is at least as big as parms + data */
343 /* check that bcc is less than negotiated smb buffer */
344 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); 342 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
345 if (total_size >= 512) 343 if (total_size >= 512)
346 goto vt2_err; 344 goto vt2_err;
347 345
346 /* check that bcc is at least as big as parms + data, and that it is
347 * less than negotiated smb buffer
348 */
348 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); 349 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
349 if (total_size > get_bcc(&pSMB->hdr) || 350 if (total_size > get_bcc(&pSMB->hdr) ||
350 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) 351 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
357 return -EINVAL; 358 return -EINVAL;
358} 359}
359 360
361static inline void inc_rfc1001_len(void *pSMB, int count)
362{
363 struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
364
365 be32_add_cpu(&hdr->smb_buf_length, count);
366}
367
360int 368int
361CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 369CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
362{ 370{
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
409 count += strlen(protocols[i].name) + 1; 417 count += strlen(protocols[i].name) + 1;
410 /* null at end of source and target buffers anyway */ 418 /* null at end of source and target buffers anyway */
411 } 419 }
412 pSMB->hdr.smb_buf_length += count; 420 inc_rfc1001_len(pSMB, count);
413 pSMB->ByteCount = cpu_to_le16(count); 421 pSMB->ByteCount = cpu_to_le16(count);
414 422
415 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 423 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
541 server->secType = RawNTLMSSP; 549 server->secType = RawNTLMSSP;
542 else if (secFlags & CIFSSEC_MAY_LANMAN) 550 else if (secFlags & CIFSSEC_MAY_LANMAN)
543 server->secType = LANMAN; 551 server->secType = LANMAN;
544/* #ifdef CONFIG_CIFS_EXPERIMENTAL
545 else if (secFlags & CIFSSEC_MAY_PLNTXT)
546 server->secType = ??
547#endif */
548 else { 552 else {
549 rc = -EOPNOTSUPP; 553 rc = -EOPNOTSUPP;
550 cERROR(1, "Invalid security type"); 554 cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
578 582
579 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 583 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
580 (server->capabilities & CAP_EXTENDED_SECURITY)) { 584 (server->capabilities & CAP_EXTENDED_SECURITY)) {
581 count = pSMBr->ByteCount; 585 count = get_bcc(&pSMBr->hdr);
582 if (count < 16) { 586 if (count < 16) {
583 rc = -EIO; 587 rc = -EIO;
584 goto neg_err_exit; 588 goto neg_err_exit;
@@ -729,12 +733,12 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
729 return rc; 733 return rc;
730 734
731 /* set up echo request */ 735 /* set up echo request */
732 smb->hdr.Tid = cpu_to_le16(0xffff); 736 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 737 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 738 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 739 put_bcc(1, &smb->hdr);
736 smb->Data[0] = 'a'; 740 smb->Data[0] = 'a';
737 smb->hdr.smb_buf_length += 3; 741 inc_rfc1001_len(smb, 3);
738 742
739 rc = cifs_call_async(server, (struct smb_hdr *)smb, 743 rc = cifs_call_async(server, (struct smb_hdr *)smb,
740 cifs_echo_callback, server); 744 cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
852 pSMB->TotalParameterCount = pSMB->ParameterCount; 856 pSMB->TotalParameterCount = pSMB->ParameterCount;
853 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); 857 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
854 pSMB->Reserved4 = 0; 858 pSMB->Reserved4 = 0;
855 pSMB->hdr.smb_buf_length += byte_count; 859 inc_rfc1001_len(pSMB, byte_count);
856 pSMB->ByteCount = cpu_to_le16(byte_count); 860 pSMB->ByteCount = cpu_to_le16(byte_count);
857 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 861 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
858 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 862 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
898 pSMB->SearchAttributes = 902 pSMB->SearchAttributes =
899 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 903 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
900 pSMB->BufferFormat = 0x04; 904 pSMB->BufferFormat = 0x04;
901 pSMB->hdr.smb_buf_length += name_len + 1; 905 inc_rfc1001_len(pSMB, name_len + 1);
902 pSMB->ByteCount = cpu_to_le16(name_len + 1); 906 pSMB->ByteCount = cpu_to_le16(name_len + 1);
903 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 907 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
904 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 908 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
942 } 946 }
943 947
944 pSMB->BufferFormat = 0x04; 948 pSMB->BufferFormat = 0x04;
945 pSMB->hdr.smb_buf_length += name_len + 1; 949 inc_rfc1001_len(pSMB, name_len + 1);
946 pSMB->ByteCount = cpu_to_le16(name_len + 1); 950 pSMB->ByteCount = cpu_to_le16(name_len + 1);
947 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 951 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
948 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 952 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
985 } 989 }
986 990
987 pSMB->BufferFormat = 0x04; 991 pSMB->BufferFormat = 0x04;
988 pSMB->hdr.smb_buf_length += name_len + 1; 992 inc_rfc1001_len(pSMB, name_len + 1);
989 pSMB->ByteCount = cpu_to_le16(name_len + 1); 993 pSMB->ByteCount = cpu_to_le16(name_len + 1);
990 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 994 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
991 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 995 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
1063 pSMB->TotalParameterCount = pSMB->ParameterCount; 1067 pSMB->TotalParameterCount = pSMB->ParameterCount;
1064 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); 1068 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
1065 pSMB->Reserved4 = 0; 1069 pSMB->Reserved4 = 0;
1066 pSMB->hdr.smb_buf_length += byte_count; 1070 inc_rfc1001_len(pSMB, byte_count);
1067 pSMB->ByteCount = cpu_to_le16(byte_count); 1071 pSMB->ByteCount = cpu_to_le16(byte_count);
1068 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1072 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1069 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1073 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
1075 cFYI(1, "copying inode info"); 1079 cFYI(1, "copying inode info");
1076 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1080 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1077 1081
1078 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1082 if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
1079 rc = -EIO; /* bad smb */ 1083 rc = -EIO; /* bad smb */
1080 goto psx_create_err; 1084 goto psx_create_err;
1081 } 1085 }
@@ -1096,7 +1100,7 @@ PsxCreat:
1096 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1100 pRetData->Type = cpu_to_le32(-1); /* unknown */
1097 cFYI(DBG2, "unknown type"); 1101 cFYI(DBG2, "unknown type");
1098 } else { 1102 } else {
1099 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1103 if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
1100 + sizeof(FILE_UNIX_BASIC_INFO)) { 1104 + sizeof(FILE_UNIX_BASIC_INFO)) {
1101 cERROR(1, "Open response data too small"); 1105 cERROR(1, "Open response data too small");
1102 pRetData->Type = cpu_to_le32(-1); 1106 pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
1228 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); 1232 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
1229 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); 1233 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
1230 count += name_len; 1234 count += name_len;
1231 pSMB->hdr.smb_buf_length += count; 1235 inc_rfc1001_len(pSMB, count);
1232 1236
1233 pSMB->ByteCount = cpu_to_le16(count); 1237 pSMB->ByteCount = cpu_to_le16(count);
1234 /* long_op set to 1 to allow for oplock break timeouts */ 1238 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
1341 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY; 1345 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
1342 1346
1343 count += name_len; 1347 count += name_len;
1344 pSMB->hdr.smb_buf_length += count; 1348 inc_rfc1001_len(pSMB, count);
1345 1349
1346 pSMB->ByteCount = cpu_to_le16(count); 1350 pSMB->ByteCount = cpu_to_le16(count);
1347 /* long_op set to 1 to allow for oplock break timeouts */ 1351 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1426 } 1430 }
1427 1431
1428 iov[0].iov_base = (char *)pSMB; 1432 iov[0].iov_base = (char *)pSMB;
1429 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1433 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1430 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1434 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1431 &resp_buf_type, CIFS_LOG_ERROR); 1435 &resp_buf_type, CIFS_LOG_ERROR);
1432 cifs_stats_inc(&tcon->num_reads); 1436 cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1560 1564
1561 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1565 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1562 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1566 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1563 pSMB->hdr.smb_buf_length += byte_count; 1567 inc_rfc1001_len(pSMB, byte_count);
1564 1568
1565 if (wct == 14) 1569 if (wct == 14)
1566 pSMB->ByteCount = cpu_to_le16(byte_count); 1570 pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1644 1648
1645 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); 1649 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
1646 pSMB->DataLengthHigh = cpu_to_le16(count >> 16); 1650 pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
1647 smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ 1651 /* header + 1 byte pad */
1652 smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
1648 if (wct == 14) 1653 if (wct == 14)
1649 pSMB->hdr.smb_buf_length += count+1; 1654 inc_rfc1001_len(pSMB, count + 1);
1650 else /* wct == 12 */ 1655 else /* wct == 12 */
1651 pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 1656 inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
1652 if (wct == 14) 1657 if (wct == 14)
1653 pSMB->ByteCount = cpu_to_le16(count + 1); 1658 pSMB->ByteCount = cpu_to_le16(count + 1);
1654 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { 1659 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1748 /* oplock break */ 1753 /* oplock break */
1749 count = 0; 1754 count = 0;
1750 } 1755 }
1751 pSMB->hdr.smb_buf_length += count; 1756 inc_rfc1001_len(pSMB, count);
1752 pSMB->ByteCount = cpu_to_le16(count); 1757 pSMB->ByteCount = cpu_to_le16(count);
1753 1758
1754 if (waitFlag) { 1759 if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1839 pSMB->Fid = smb_file_id; 1844 pSMB->Fid = smb_file_id;
1840 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); 1845 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1841 pSMB->Reserved4 = 0; 1846 pSMB->Reserved4 = 0;
1842 pSMB->hdr.smb_buf_length += byte_count; 1847 inc_rfc1001_len(pSMB, byte_count);
1843 pSMB->ByteCount = cpu_to_le16(byte_count); 1848 pSMB->ByteCount = cpu_to_le16(byte_count);
1844 if (waitFlag) { 1849 if (waitFlag) {
1845 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 1850 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1846 (struct smb_hdr *) pSMBr, &bytes_returned); 1851 (struct smb_hdr *) pSMBr, &bytes_returned);
1847 } else { 1852 } else {
1848 iov[0].iov_base = (char *)pSMB; 1853 iov[0].iov_base = (char *)pSMB;
1849 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1854 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1850 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1855 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1851 &resp_buf_type, timeout); 1856 &resp_buf_type, timeout);
1852 pSMB = NULL; /* request buf already freed by SendReceive2. Do 1857 pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 __u16 data_count; 1867 __u16 data_count;
1863 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1868 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1864 1869
1865 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { 1870 if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
1866 rc = -EIO; /* bad smb */ 1871 rc = -EIO; /* bad smb */
1867 goto plk_err_exit; 1872 goto plk_err_exit;
1868 } 1873 }
@@ -1884,10 +1889,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1884 __constant_cpu_to_le16(CIFS_WRLCK)) 1889 __constant_cpu_to_le16(CIFS_WRLCK))
1885 pLockData->fl_type = F_WRLCK; 1890 pLockData->fl_type = F_WRLCK;
1886 1891
1887 pLockData->fl_start = parm_data->start; 1892 pLockData->fl_start = le64_to_cpu(parm_data->start);
1888 pLockData->fl_end = parm_data->start + 1893 pLockData->fl_end = pLockData->fl_start +
1889 parm_data->length - 1; 1894 le64_to_cpu(parm_data->length) - 1;
1890 pLockData->fl_pid = parm_data->pid; 1895 pLockData->fl_pid = le32_to_cpu(parm_data->pid);
1891 } 1896 }
1892 } 1897 }
1893 1898
@@ -2012,7 +2017,7 @@ renameRetry:
2012 } 2017 }
2013 2018
2014 count = 1 /* 1st signature byte */ + name_len + name_len2; 2019 count = 1 /* 1st signature byte */ + name_len + name_len2;
2015 pSMB->hdr.smb_buf_length += count; 2020 inc_rfc1001_len(pSMB, count);
2016 pSMB->ByteCount = cpu_to_le16(count); 2021 pSMB->ByteCount = cpu_to_le16(count);
2017 2022
2018 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2023 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2092 pSMB->InformationLevel = 2097 pSMB->InformationLevel =
2093 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); 2098 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
2094 pSMB->Reserved4 = 0; 2099 pSMB->Reserved4 = 0;
2095 pSMB->hdr.smb_buf_length += byte_count; 2100 inc_rfc1001_len(pSMB, byte_count);
2096 pSMB->ByteCount = cpu_to_le16(byte_count); 2101 pSMB->ByteCount = cpu_to_le16(byte_count);
2097 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2102 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2098 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
2159 } 2164 }
2160 2165
2161 count = 1 /* 1st signature byte */ + name_len + name_len2; 2166 count = 1 /* 1st signature byte */ + name_len + name_len2;
2162 pSMB->hdr.smb_buf_length += count; 2167 inc_rfc1001_len(pSMB, count);
2163 pSMB->ByteCount = cpu_to_le16(count); 2168 pSMB->ByteCount = cpu_to_le16(count);
2164 2169
2165 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
2249 pSMB->DataOffset = cpu_to_le16(offset); 2254 pSMB->DataOffset = cpu_to_le16(offset);
2250 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); 2255 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
2251 pSMB->Reserved4 = 0; 2256 pSMB->Reserved4 = 0;
2252 pSMB->hdr.smb_buf_length += byte_count; 2257 inc_rfc1001_len(pSMB, byte_count);
2253 pSMB->ByteCount = cpu_to_le16(byte_count); 2258 pSMB->ByteCount = cpu_to_le16(byte_count);
2254 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2259 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2255 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2260 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
2335 pSMB->DataOffset = cpu_to_le16(offset); 2340 pSMB->DataOffset = cpu_to_le16(offset);
2336 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); 2341 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
2337 pSMB->Reserved4 = 0; 2342 pSMB->Reserved4 = 0;
2338 pSMB->hdr.smb_buf_length += byte_count; 2343 inc_rfc1001_len(pSMB, byte_count);
2339 pSMB->ByteCount = cpu_to_le16(byte_count); 2344 pSMB->ByteCount = cpu_to_le16(byte_count);
2340 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2345 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2341 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2346 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
2406 } 2411 }
2407 2412
2408 count = 1 /* string type byte */ + name_len + name_len2; 2413 count = 1 /* string type byte */ + name_len + name_len2;
2409 pSMB->hdr.smb_buf_length += count; 2414 inc_rfc1001_len(pSMB, count);
2410 pSMB->ByteCount = cpu_to_le16(count); 2415 pSMB->ByteCount = cpu_to_le16(count);
2411 2416
2412 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2417 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
2477 pSMB->ParameterCount = pSMB->TotalParameterCount; 2482 pSMB->ParameterCount = pSMB->TotalParameterCount;
2478 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); 2483 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
2479 pSMB->Reserved4 = 0; 2484 pSMB->Reserved4 = 0;
2480 pSMB->hdr.smb_buf_length += byte_count; 2485 inc_rfc1001_len(pSMB, byte_count);
2481 pSMB->ByteCount = cpu_to_le16(byte_count); 2486 pSMB->ByteCount = cpu_to_le16(byte_count);
2482 2487
2483 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2488 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
2489 2494
2490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2495 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2491 /* BB also check enough total bytes returned */ 2496 /* BB also check enough total bytes returned */
2492 if (rc || (pSMBr->ByteCount < 2)) 2497 if (rc || get_bcc(&pSMBr->hdr) < 2)
2493 rc = -EIO; 2498 rc = -EIO;
2494 else { 2499 else {
2495 bool is_unicode; 2500 bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
2516 return rc; 2521 return rc;
2517} 2522}
2518 2523
2519#ifdef CONFIG_CIFS_EXPERIMENTAL 2524#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
2525/*
2526 * Recent Windows versions now create symlinks more frequently
2527 * and they use the "reparse point" mechanism below. We can of course
2528 * do symlinks nicely to Samba and other servers which support the
2529 * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
2530 * "MF" symlinks optionally, but for recent Windows we really need to
2531 * reenable the code below and fix the cifs_symlink callers to handle this.
2532 * In the interim this code has been moved to its own config option so
2533 * it is not compiled in by default until callers fixed up and more tested.
2534 */
2520int 2535int
2521CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2536CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2522 const unsigned char *searchName, 2537 const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2561 } else { /* decode response */ 2576 } else { /* decode response */
2562 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2577 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2563 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2578 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2564 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { 2579 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
2565 /* BB also check enough total bytes returned */ 2580 /* BB also check enough total bytes returned */
2566 rc = -EIO; /* bad smb */ 2581 rc = -EIO; /* bad smb */
2567 goto qreparse_out; 2582 goto qreparse_out;
2568 } 2583 }
2569 if (data_count && (data_count < 2048)) { 2584 if (data_count && (data_count < 2048)) {
2570 char *end_of_smb = 2 /* sizeof byte count */ + 2585 char *end_of_smb = 2 /* sizeof byte count */ +
2571 pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2586 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
2572 2587
2573 struct reparse_data *reparse_buf = 2588 struct reparse_data *reparse_buf =
2574 (struct reparse_data *) 2589 (struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
2618 2633
2619 return rc; 2634 return rc;
2620} 2635}
2621#endif /* CIFS_EXPERIMENTAL */ 2636#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
2622 2637
2623#ifdef CONFIG_CIFS_POSIX 2638#ifdef CONFIG_CIFS_POSIX
2624 2639
@@ -2814,7 +2829,7 @@ queryAclRetry:
2814 pSMB->ParameterCount = pSMB->TotalParameterCount; 2829 pSMB->ParameterCount = pSMB->TotalParameterCount;
2815 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); 2830 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
2816 pSMB->Reserved4 = 0; 2831 pSMB->Reserved4 = 0;
2817 pSMB->hdr.smb_buf_length += byte_count; 2832 inc_rfc1001_len(pSMB, byte_count);
2818 pSMB->ByteCount = cpu_to_le16(byte_count); 2833 pSMB->ByteCount = cpu_to_le16(byte_count);
2819 2834
2820 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2835 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
2826 /* decode response */ 2841 /* decode response */
2827 2842
2828 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2843 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2829 if (rc || (pSMBr->ByteCount < 2))
2830 /* BB also check enough total bytes returned */ 2844 /* BB also check enough total bytes returned */
2845 if (rc || get_bcc(&pSMBr->hdr) < 2)
2831 rc = -EIO; /* bad smb */ 2846 rc = -EIO; /* bad smb */
2832 else { 2847 else {
2833 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 2848 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
2908 pSMB->ParameterCount = cpu_to_le16(params); 2923 pSMB->ParameterCount = cpu_to_le16(params);
2909 pSMB->TotalParameterCount = pSMB->ParameterCount; 2924 pSMB->TotalParameterCount = pSMB->ParameterCount;
2910 pSMB->Reserved4 = 0; 2925 pSMB->Reserved4 = 0;
2911 pSMB->hdr.smb_buf_length += byte_count; 2926 inc_rfc1001_len(pSMB, byte_count);
2912 pSMB->ByteCount = cpu_to_le16(byte_count); 2927 pSMB->ByteCount = cpu_to_le16(byte_count);
2913 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2928 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2914 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2929 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
2966 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); 2981 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
2967 pSMB->Pad = 0; 2982 pSMB->Pad = 0;
2968 pSMB->Fid = netfid; 2983 pSMB->Fid = netfid;
2969 pSMB->hdr.smb_buf_length += byte_count; 2984 inc_rfc1001_len(pSMB, byte_count);
2970 pSMB->t2.ByteCount = cpu_to_le16(byte_count); 2985 pSMB->t2.ByteCount = cpu_to_le16(byte_count);
2971 2986
2972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2987 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
2976 } else { 2991 } else {
2977 /* decode response */ 2992 /* decode response */
2978 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2993 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2979 if (rc || (pSMBr->ByteCount < 2))
2980 /* BB also check enough total bytes returned */ 2994 /* BB also check enough total bytes returned */
2995 if (rc || get_bcc(&pSMBr->hdr) < 2)
2981 /* If rc should we check for EOPNOSUPP and 2996 /* If rc should we check for EOPNOSUPP and
2982 disable the srvino flag? or in caller? */ 2997 disable the srvino flag? or in caller? */
2983 rc = -EIO; /* bad smb */ 2998 rc = -EIO; /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3052 char *end_of_smb; 3067 char *end_of_smb;
3053 __u32 data_count, data_offset, parm_count, parm_offset; 3068 __u32 data_count, data_offset, parm_count, parm_offset;
3054 struct smb_com_ntransact_rsp *pSMBr; 3069 struct smb_com_ntransact_rsp *pSMBr;
3070 u16 bcc;
3055 3071
3056 *pdatalen = 0; 3072 *pdatalen = 0;
3057 *pparmlen = 0; 3073 *pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3061 3077
3062 pSMBr = (struct smb_com_ntransact_rsp *)buf; 3078 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3063 3079
3064 /* ByteCount was converted from little endian in SendReceive */ 3080 bcc = get_bcc(&pSMBr->hdr);
3065 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 3081 end_of_smb = 2 /* sizeof byte count */ + bcc +
3066 (char *)&pSMBr->ByteCount; 3082 (char *)&pSMBr->ByteCount;
3067 3083
3068 data_offset = le32_to_cpu(pSMBr->DataOffset); 3084 data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3088 *ppdata, data_count, (data_count + *ppdata), 3104 *ppdata, data_count, (data_count + *ppdata),
3089 end_of_smb, pSMBr); 3105 end_of_smb, pSMBr);
3090 return -EINVAL; 3106 return -EINVAL;
3091 } else if (parm_count + data_count > pSMBr->ByteCount) { 3107 } else if (parm_count + data_count > bcc) {
3092 cFYI(1, "parm count and data count larger than SMB"); 3108 cFYI(1, "parm count and data count larger than SMB");
3093 return -EINVAL; 3109 return -EINVAL;
3094 } 3110 }
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3124 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | 3140 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
3125 CIFS_ACL_DACL); 3141 CIFS_ACL_DACL);
3126 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ 3142 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
3127 pSMB->hdr.smb_buf_length += 11; 3143 inc_rfc1001_len(pSMB, 11);
3128 iov[0].iov_base = (char *)pSMB; 3144 iov[0].iov_base = (char *)pSMB;
3129 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3145 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
3130 3146
3131 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 3147 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3132 0); 3148 0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
3235 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3251 memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
3236 (char *) pntsd, 3252 (char *) pntsd,
3237 acllen); 3253 acllen);
3238 pSMB->hdr.smb_buf_length += (byte_count + data_count); 3254 inc_rfc1001_len(pSMB, byte_count + data_count);
3239
3240 } else 3255 } else
3241 pSMB->hdr.smb_buf_length += byte_count; 3256 inc_rfc1001_len(pSMB, byte_count);
3242 3257
3243 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3258 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3244 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3259 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
3289 } 3304 }
3290 pSMB->BufferFormat = 0x04; 3305 pSMB->BufferFormat = 0x04;
3291 name_len++; /* account for buffer type byte */ 3306 name_len++; /* account for buffer type byte */
3292 pSMB->hdr.smb_buf_length += (__u16) name_len; 3307 inc_rfc1001_len(pSMB, (__u16)name_len);
3293 pSMB->ByteCount = cpu_to_le16(name_len); 3308 pSMB->ByteCount = cpu_to_le16(name_len);
3294 3309
3295 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
3364 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3379 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3365 pSMB->Pad = 0; 3380 pSMB->Pad = 0;
3366 pSMB->Fid = netfid; 3381 pSMB->Fid = netfid;
3367 pSMB->hdr.smb_buf_length += byte_count; 3382 inc_rfc1001_len(pSMB, byte_count);
3368 3383
3369 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3384 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3370 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3385 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
3375 3390
3376 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3391 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3377 rc = -EIO; 3392 rc = -EIO;
3378 else if (pSMBr->ByteCount < 40) 3393 else if (get_bcc(&pSMBr->hdr) < 40)
3379 rc = -EIO; /* bad smb */ 3394 rc = -EIO; /* bad smb */
3380 else if (pFindData) { 3395 else if (pFindData) {
3381 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3396 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
3451 else 3466 else
3452 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3467 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3453 pSMB->Reserved4 = 0; 3468 pSMB->Reserved4 = 0;
3454 pSMB->hdr.smb_buf_length += byte_count; 3469 inc_rfc1001_len(pSMB, byte_count);
3455 pSMB->ByteCount = cpu_to_le16(byte_count); 3470 pSMB->ByteCount = cpu_to_le16(byte_count);
3456 3471
3457 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3472 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
3463 3478
3464 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3479 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3465 rc = -EIO; 3480 rc = -EIO;
3466 else if (!legacy && (pSMBr->ByteCount < 40)) 3481 else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
3467 rc = -EIO; /* bad smb */ 3482 rc = -EIO; /* bad smb */
3468 else if (legacy && (pSMBr->ByteCount < 24)) 3483 else if (legacy && get_bcc(&pSMBr->hdr) < 24)
3469 rc = -EIO; /* 24 or 26 expected but we do not read 3484 rc = -EIO; /* 24 or 26 expected but we do not read
3470 last field */ 3485 last field */
3471 else if (pFindData) { 3486 else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
3532 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3547 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3533 pSMB->Pad = 0; 3548 pSMB->Pad = 0;
3534 pSMB->Fid = netfid; 3549 pSMB->Fid = netfid;
3535 pSMB->hdr.smb_buf_length += byte_count; 3550 inc_rfc1001_len(pSMB, byte_count);
3536 3551
3537 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3552 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3538 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3553 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
3541 } else { /* decode response */ 3556 } else { /* decode response */
3542 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3557 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3543 3558
3544 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3559 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3545 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3560 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3546 "Unix Extensions can be disabled on mount " 3561 "Unix Extensions can be disabled on mount "
3547 "by specifying the nosfu mount option."); 3562 "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
3617 pSMB->ParameterCount = pSMB->TotalParameterCount; 3632 pSMB->ParameterCount = pSMB->TotalParameterCount;
3618 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3633 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3619 pSMB->Reserved4 = 0; 3634 pSMB->Reserved4 = 0;
3620 pSMB->hdr.smb_buf_length += byte_count; 3635 inc_rfc1001_len(pSMB, byte_count);
3621 pSMB->ByteCount = cpu_to_le16(byte_count); 3636 pSMB->ByteCount = cpu_to_le16(byte_count);
3622 3637
3623 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3638 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
3627 } else { /* decode response */ 3642 } else { /* decode response */
3628 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3643 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3629 3644
3630 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3645 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3631 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3646 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3632 "Unix Extensions can be disabled on mount " 3647 "Unix Extensions can be disabled on mount "
3633 "by specifying the nosfu mount option."); 3648 "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
3731 3746
3732 /* BB what should we set StorageType to? Does it matter? BB */ 3747 /* BB what should we set StorageType to? Does it matter? BB */
3733 pSMB->SearchStorageType = 0; 3748 pSMB->SearchStorageType = 0;
3734 pSMB->hdr.smb_buf_length += byte_count; 3749 inc_rfc1001_len(pSMB, byte_count);
3735 pSMB->ByteCount = cpu_to_le16(byte_count); 3750 pSMB->ByteCount = cpu_to_le16(byte_count);
3736 3751
3737 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3752 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3860 byte_count = params + 1 /* pad */ ; 3875 byte_count = params + 1 /* pad */ ;
3861 pSMB->TotalParameterCount = cpu_to_le16(params); 3876 pSMB->TotalParameterCount = cpu_to_le16(params);
3862 pSMB->ParameterCount = pSMB->TotalParameterCount; 3877 pSMB->ParameterCount = pSMB->TotalParameterCount;
3863 pSMB->hdr.smb_buf_length += byte_count; 3878 inc_rfc1001_len(pSMB, byte_count);
3864 pSMB->ByteCount = cpu_to_le16(byte_count); 3879 pSMB->ByteCount = cpu_to_le16(byte_count);
3865 3880
3866 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3881 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
4022 pSMB->ParameterCount = pSMB->TotalParameterCount; 4037 pSMB->ParameterCount = pSMB->TotalParameterCount;
4023 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); 4038 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
4024 pSMB->Reserved4 = 0; 4039 pSMB->Reserved4 = 0;
4025 pSMB->hdr.smb_buf_length += byte_count; 4040 inc_rfc1001_len(pSMB, byte_count);
4026 pSMB->ByteCount = cpu_to_le16(byte_count); 4041 pSMB->ByteCount = cpu_to_le16(byte_count);
4027 4042
4028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4043 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
4032 } else { 4047 } else {
4033 /* decode response */ 4048 /* decode response */
4034 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4049 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4035 if (rc || (pSMBr->ByteCount < 2))
4036 /* BB also check enough total bytes returned */ 4050 /* BB also check enough total bytes returned */
4051 if (rc || get_bcc(&pSMBr->hdr) < 2)
4037 /* If rc should we check for EOPNOSUPP and 4052 /* If rc should we check for EOPNOSUPP and
4038 disable the srvino flag? or in caller? */ 4053 disable the srvino flag? or in caller? */
4039 rc = -EIO; /* bad smb */ 4054 rc = -EIO; /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
4246 pSMB->ParameterCount = cpu_to_le16(params); 4261 pSMB->ParameterCount = cpu_to_le16(params);
4247 pSMB->TotalParameterCount = pSMB->ParameterCount; 4262 pSMB->TotalParameterCount = pSMB->ParameterCount;
4248 pSMB->MaxReferralLevel = cpu_to_le16(3); 4263 pSMB->MaxReferralLevel = cpu_to_le16(3);
4249 pSMB->hdr.smb_buf_length += byte_count; 4264 inc_rfc1001_len(pSMB, byte_count);
4250 pSMB->ByteCount = cpu_to_le16(byte_count); 4265 pSMB->ByteCount = cpu_to_le16(byte_count);
4251 4266
4252 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
4258 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4273 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4259 4274
4260 /* BB Also check if enough total bytes returned? */ 4275 /* BB Also check if enough total bytes returned? */
4261 if (rc || (pSMBr->ByteCount < 17)) { 4276 if (rc || get_bcc(&pSMBr->hdr) < 17) {
4262 rc = -EIO; /* bad smb */ 4277 rc = -EIO; /* bad smb */
4263 goto GetDFSRefExit; 4278 goto GetDFSRefExit;
4264 } 4279 }
4265 4280
4266 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", 4281 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4267 pSMBr->ByteCount, 4282 get_bcc(&pSMBr->hdr),
4268 le16_to_cpu(pSMBr->t2.DataOffset)); 4283 le16_to_cpu(pSMBr->t2.DataOffset));
4269 4284
4270 /* parse returned result into more usable form */ 4285 /* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
4320 pSMB->Reserved3 = 0; 4335 pSMB->Reserved3 = 0;
4321 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4336 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4322 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); 4337 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
4323 pSMB->hdr.smb_buf_length += byte_count; 4338 inc_rfc1001_len(pSMB, byte_count);
4324 pSMB->ByteCount = cpu_to_le16(byte_count); 4339 pSMB->ByteCount = cpu_to_le16(byte_count);
4325 4340
4326 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4341 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
4330 } else { /* decode response */ 4345 } else { /* decode response */
4331 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4346 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4332 4347
4333 if (rc || (pSMBr->ByteCount < 18)) 4348 if (rc || get_bcc(&pSMBr->hdr) < 18)
4334 rc = -EIO; /* bad smb */ 4349 rc = -EIO; /* bad smb */
4335 else { 4350 else {
4336 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4351 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4337 cFYI(1, "qfsinf resp BCC: %d Offset %d", 4352 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4338 pSMBr->ByteCount, data_offset); 4353 get_bcc(&pSMBr->hdr), data_offset);
4339 4354
4340 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4355 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4341 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4356 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
4399 pSMB->Reserved3 = 0; 4414 pSMB->Reserved3 = 0;
4400 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4415 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4401 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); 4416 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
4402 pSMB->hdr.smb_buf_length += byte_count; 4417 inc_rfc1001_len(pSMB, byte_count);
4403 pSMB->ByteCount = cpu_to_le16(byte_count); 4418 pSMB->ByteCount = cpu_to_le16(byte_count);
4404 4419
4405 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
4409 } else { /* decode response */ 4424 } else { /* decode response */
4410 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4425 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4411 4426
4412 if (rc || (pSMBr->ByteCount < 24)) 4427 if (rc || get_bcc(&pSMBr->hdr) < 24)
4413 rc = -EIO; /* bad smb */ 4428 rc = -EIO; /* bad smb */
4414 else { 4429 else {
4415 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4430 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
4479 pSMB->Reserved3 = 0; 4494 pSMB->Reserved3 = 0;
4480 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4495 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4481 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); 4496 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
4482 pSMB->hdr.smb_buf_length += byte_count; 4497 inc_rfc1001_len(pSMB, byte_count);
4483 pSMB->ByteCount = cpu_to_le16(byte_count); 4498 pSMB->ByteCount = cpu_to_le16(byte_count);
4484 4499
4485 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4500 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
4489 } else { /* decode response */ 4504 } else { /* decode response */
4490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4505 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4491 4506
4492 if (rc || (pSMBr->ByteCount < 13)) { 4507 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4493 /* BB also check if enough bytes returned */ 4508 /* BB also check if enough bytes returned */
4494 rc = -EIO; /* bad smb */ 4509 rc = -EIO; /* bad smb */
4495 } else { 4510 } else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
4550 pSMB->Reserved3 = 0; 4565 pSMB->Reserved3 = 0;
4551 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4566 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4552 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); 4567 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
4553 pSMB->hdr.smb_buf_length += byte_count; 4568 inc_rfc1001_len(pSMB, byte_count);
4554 pSMB->ByteCount = cpu_to_le16(byte_count); 4569 pSMB->ByteCount = cpu_to_le16(byte_count);
4555 4570
4556 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4571 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
4560 } else { /* decode response */ 4575 } else { /* decode response */
4561 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4576 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4562 4577
4563 if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO))) 4578 if (rc || get_bcc(&pSMBr->hdr) <
4579 sizeof(FILE_SYSTEM_DEVICE_INFO))
4564 rc = -EIO; /* bad smb */ 4580 rc = -EIO; /* bad smb */
4565 else { 4581 else {
4566 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4582 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
4619 pSMB->Reserved3 = 0; 4635 pSMB->Reserved3 = 0;
4620 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4636 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4621 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); 4637 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
4622 pSMB->hdr.smb_buf_length += byte_count; 4638 inc_rfc1001_len(pSMB, byte_count);
4623 pSMB->ByteCount = cpu_to_le16(byte_count); 4639 pSMB->ByteCount = cpu_to_le16(byte_count);
4624 4640
4625 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4641 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
4629 } else { /* decode response */ 4645 } else { /* decode response */
4630 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4646 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4631 4647
4632 if (rc || (pSMBr->ByteCount < 13)) { 4648 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4633 rc = -EIO; /* bad smb */ 4649 rc = -EIO; /* bad smb */
4634 } else { 4650 } else {
4635 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4651 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
4702 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); 4718 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
4703 pSMB->ClientUnixCap = cpu_to_le64(cap); 4719 pSMB->ClientUnixCap = cpu_to_le64(cap);
4704 4720
4705 pSMB->hdr.smb_buf_length += byte_count; 4721 inc_rfc1001_len(pSMB, byte_count);
4706 pSMB->ByteCount = cpu_to_le16(byte_count); 4722 pSMB->ByteCount = cpu_to_le16(byte_count);
4707 4723
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4724 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
4764 pSMB->Reserved3 = 0; 4780 pSMB->Reserved3 = 0;
4765 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4781 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4766 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); 4782 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
4767 pSMB->hdr.smb_buf_length += byte_count; 4783 inc_rfc1001_len(pSMB, byte_count);
4768 pSMB->ByteCount = cpu_to_le16(byte_count); 4784 pSMB->ByteCount = cpu_to_le16(byte_count);
4769 4785
4770 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4786 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
4774 } else { /* decode response */ 4790 } else { /* decode response */
4775 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4791 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4776 4792
4777 if (rc || (pSMBr->ByteCount < 13)) { 4793 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4778 rc = -EIO; /* bad smb */ 4794 rc = -EIO; /* bad smb */
4779 } else { 4795 } else {
4780 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4796 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
4890 pSMB->ParameterCount = cpu_to_le16(params); 4906 pSMB->ParameterCount = cpu_to_le16(params);
4891 pSMB->TotalParameterCount = pSMB->ParameterCount; 4907 pSMB->TotalParameterCount = pSMB->ParameterCount;
4892 pSMB->Reserved4 = 0; 4908 pSMB->Reserved4 = 0;
4893 pSMB->hdr.smb_buf_length += byte_count; 4909 inc_rfc1001_len(pSMB, byte_count);
4894 parm_data->FileSize = cpu_to_le64(size); 4910 parm_data->FileSize = cpu_to_le64(size);
4895 pSMB->ByteCount = cpu_to_le16(byte_count); 4911 pSMB->ByteCount = cpu_to_le16(byte_count);
4896 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4912 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4969 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); 4985 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
4970 } 4986 }
4971 pSMB->Reserved4 = 0; 4987 pSMB->Reserved4 = 0;
4972 pSMB->hdr.smb_buf_length += byte_count; 4988 inc_rfc1001_len(pSMB, byte_count);
4973 pSMB->ByteCount = cpu_to_le16(byte_count); 4989 pSMB->ByteCount = cpu_to_le16(byte_count);
4974 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4990 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4975 if (rc) { 4991 if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5037 else 5053 else
5038 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5054 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5039 pSMB->Reserved4 = 0; 5055 pSMB->Reserved4 = 0;
5040 pSMB->hdr.smb_buf_length += byte_count; 5056 inc_rfc1001_len(pSMB, byte_count);
5041 pSMB->ByteCount = cpu_to_le16(byte_count); 5057 pSMB->ByteCount = cpu_to_le16(byte_count);
5042 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5058 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5043 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5059 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5096 pSMB->Fid = fid; 5112 pSMB->Fid = fid;
5097 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); 5113 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
5098 pSMB->Reserved4 = 0; 5114 pSMB->Reserved4 = 0;
5099 pSMB->hdr.smb_buf_length += byte_count; 5115 inc_rfc1001_len(pSMB, byte_count);
5100 pSMB->ByteCount = cpu_to_le16(byte_count); 5116 pSMB->ByteCount = cpu_to_le16(byte_count);
5101 *data_offset = delete_file ? 1 : 0; 5117 *data_offset = delete_file ? 1 : 0;
5102 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5118 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
5169 else 5185 else
5170 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5186 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5171 pSMB->Reserved4 = 0; 5187 pSMB->Reserved4 = 0;
5172 pSMB->hdr.smb_buf_length += byte_count; 5188 inc_rfc1001_len(pSMB, byte_count);
5173 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5189 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5174 pSMB->ByteCount = cpu_to_le16(byte_count); 5190 pSMB->ByteCount = cpu_to_le16(byte_count);
5175 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5191 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
5221 } 5237 }
5222 pSMB->attr = cpu_to_le16(dos_attrs); 5238 pSMB->attr = cpu_to_le16(dos_attrs);
5223 pSMB->BufferFormat = 0x04; 5239 pSMB->BufferFormat = 0x04;
5224 pSMB->hdr.smb_buf_length += name_len + 1; 5240 inc_rfc1001_len(pSMB, name_len + 1);
5225 pSMB->ByteCount = cpu_to_le16(name_len + 1); 5241 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5226 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5242 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5227 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5243 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5247,7 +5263,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5247 * Samba server ignores set of file size to zero due to bugs in some 5263 * Samba server ignores set of file size to zero due to bugs in some
5248 * older clients, but we should be precise - we use SetFileSize to 5264 * older clients, but we should be precise - we use SetFileSize to
5249 * set file size and do not want to truncate file size to zero 5265 * set file size and do not want to truncate file size to zero
5250 * accidently as happened on one Samba server beta by putting 5266 * accidentally as happened on one Samba server beta by putting
5251 * zero instead of -1 here 5267 * zero instead of -1 here
5252 */ 5268 */
5253 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); 5269 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5326 pSMB->Fid = fid; 5342 pSMB->Fid = fid;
5327 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5343 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5328 pSMB->Reserved4 = 0; 5344 pSMB->Reserved4 = 0;
5329 pSMB->hdr.smb_buf_length += byte_count; 5345 inc_rfc1001_len(pSMB, byte_count);
5330 pSMB->ByteCount = cpu_to_le16(byte_count); 5346 pSMB->ByteCount = cpu_to_le16(byte_count);
5331 5347
5332 cifs_fill_unix_set_info(data_offset, args); 5348 cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
5402 pSMB->TotalDataCount = pSMB->DataCount; 5418 pSMB->TotalDataCount = pSMB->DataCount;
5403 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5419 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5404 pSMB->Reserved4 = 0; 5420 pSMB->Reserved4 = 0;
5405 pSMB->hdr.smb_buf_length += byte_count; 5421 inc_rfc1001_len(pSMB, byte_count);
5406 5422
5407 cifs_fill_unix_set_info(data_offset, args); 5423 cifs_fill_unix_set_info(data_offset, args);
5408 5424
@@ -5418,79 +5434,6 @@ setPermsRetry:
5418 return rc; 5434 return rc;
5419} 5435}
5420 5436
5421int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5422 const int notify_subdirs, const __u16 netfid,
5423 __u32 filter, struct file *pfile, int multishot,
5424 const struct nls_table *nls_codepage)
5425{
5426 int rc = 0;
5427 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5428 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5429 struct dir_notify_req *dnotify_req;
5430 int bytes_returned;
5431
5432 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5433 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5434 (void **) &pSMBr);
5435 if (rc)
5436 return rc;
5437
5438 pSMB->TotalParameterCount = 0 ;
5439 pSMB->TotalDataCount = 0;
5440 pSMB->MaxParameterCount = cpu_to_le32(2);
5441 /* BB find exact data count max from sess structure BB */
5442 pSMB->MaxDataCount = 0; /* same in little endian or be */
5443/* BB VERIFY verify which is correct for above BB */
5444 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5445 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5446
5447 pSMB->MaxSetupCount = 4;
5448 pSMB->Reserved = 0;
5449 pSMB->ParameterOffset = 0;
5450 pSMB->DataCount = 0;
5451 pSMB->DataOffset = 0;
5452 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5453 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5454 pSMB->ParameterCount = pSMB->TotalParameterCount;
5455 if (notify_subdirs)
5456 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5457 pSMB->Reserved2 = 0;
5458 pSMB->CompletionFilter = cpu_to_le32(filter);
5459 pSMB->Fid = netfid; /* file handle always le */
5460 pSMB->ByteCount = 0;
5461
5462 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5463 (struct smb_hdr *)pSMBr, &bytes_returned,
5464 CIFS_ASYNC_OP);
5465 if (rc) {
5466 cFYI(1, "Error in Notify = %d", rc);
5467 } else {
5468 /* Add file to outstanding requests */
5469 /* BB change to kmem cache alloc */
5470 dnotify_req = kmalloc(
5471 sizeof(struct dir_notify_req),
5472 GFP_KERNEL);
5473 if (dnotify_req) {
5474 dnotify_req->Pid = pSMB->hdr.Pid;
5475 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5476 dnotify_req->Mid = pSMB->hdr.Mid;
5477 dnotify_req->Tid = pSMB->hdr.Tid;
5478 dnotify_req->Uid = pSMB->hdr.Uid;
5479 dnotify_req->netfid = netfid;
5480 dnotify_req->pfile = pfile;
5481 dnotify_req->filter = filter;
5482 dnotify_req->multishot = multishot;
5483 spin_lock(&GlobalMid_Lock);
5484 list_add_tail(&dnotify_req->lhead,
5485 &GlobalDnotifyReqList);
5486 spin_unlock(&GlobalMid_Lock);
5487 } else
5488 rc = -ENOMEM;
5489 }
5490 cifs_buf_release(pSMB);
5491 return rc;
5492}
5493
5494#ifdef CONFIG_CIFS_XATTR 5437#ifdef CONFIG_CIFS_XATTR
5495/* 5438/*
5496 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common 5439 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
5560 pSMB->ParameterCount = pSMB->TotalParameterCount; 5503 pSMB->ParameterCount = pSMB->TotalParameterCount;
5561 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); 5504 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5562 pSMB->Reserved4 = 0; 5505 pSMB->Reserved4 = 0;
5563 pSMB->hdr.smb_buf_length += byte_count; 5506 inc_rfc1001_len(pSMB, byte_count);
5564 pSMB->ByteCount = cpu_to_le16(byte_count); 5507 pSMB->ByteCount = cpu_to_le16(byte_count);
5565 5508
5566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
5576 of these trans2 responses */ 5519 of these trans2 responses */
5577 5520
5578 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5521 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5579 if (rc || (pSMBr->ByteCount < 4)) { 5522 if (rc || get_bcc(&pSMBr->hdr) < 4) {
5580 rc = -EIO; /* bad smb */ 5523 rc = -EIO; /* bad smb */
5581 goto QAllEAsOut; 5524 goto QAllEAsOut;
5582 } 5525 }
@@ -5773,7 +5716,7 @@ SetEARetry:
5773 pSMB->ParameterCount = cpu_to_le16(params); 5716 pSMB->ParameterCount = cpu_to_le16(params);
5774 pSMB->TotalParameterCount = pSMB->ParameterCount; 5717 pSMB->TotalParameterCount = pSMB->ParameterCount;
5775 pSMB->Reserved4 = 0; 5718 pSMB->Reserved4 = 0;
5776 pSMB->hdr.smb_buf_length += byte_count; 5719 inc_rfc1001_len(pSMB, byte_count);
5777 pSMB->ByteCount = cpu_to_le16(byte_count); 5720 pSMB->ByteCount = cpu_to_le16(byte_count);
5778 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5779 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
5787 5730
5788 return rc; 5731 return rc;
5789} 5732}
5790
5791#endif 5733#endif
5734
5735#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
5736/*
5737 * Years ago the kernel added a "dnotify" function for Samba server,
5738 * to allow network clients (such as Windows) to display updated
5739 * lists of files in directory listings automatically when
5740 * files are added by one user when another user has the
5741 * same directory open on their desktop. The Linux cifs kernel
5742 * client hooked into the kernel side of this interface for
5743 * the same reason, but ironically when the VFS moved from
5744 * "dnotify" to "inotify" it became harder to plug in Linux
5745 * network file system clients (the most obvious use case
5746 * for notify interfaces is when multiple users can update
5747 * the contents of the same directory - exactly what network
5748 * file systems can do) although the server (Samba) could
5749 * still use it. For the short term we leave the worker
5750 * function ifdeffed out (below) until inotify is fixed
5751 * in the VFS to make it easier to plug in network file
5752 * system clients. If inotify turns out to be permanently
5753 * incompatible for network fs clients, we could instead simply
5754 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
5755 */
5756int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5757 const int notify_subdirs, const __u16 netfid,
5758 __u32 filter, struct file *pfile, int multishot,
5759 const struct nls_table *nls_codepage)
5760{
5761 int rc = 0;
5762 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5763 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5764 struct dir_notify_req *dnotify_req;
5765 int bytes_returned;
5766
5767 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5768 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5769 (void **) &pSMBr);
5770 if (rc)
5771 return rc;
5772
5773 pSMB->TotalParameterCount = 0 ;
5774 pSMB->TotalDataCount = 0;
5775 pSMB->MaxParameterCount = cpu_to_le32(2);
5776 /* BB find exact data count max from sess structure BB */
5777 pSMB->MaxDataCount = 0; /* same in little endian or be */
5778/* BB VERIFY verify which is correct for above BB */
5779 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5780 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5781
5782 pSMB->MaxSetupCount = 4;
5783 pSMB->Reserved = 0;
5784 pSMB->ParameterOffset = 0;
5785 pSMB->DataCount = 0;
5786 pSMB->DataOffset = 0;
5787 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5788 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5789 pSMB->ParameterCount = pSMB->TotalParameterCount;
5790 if (notify_subdirs)
5791 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5792 pSMB->Reserved2 = 0;
5793 pSMB->CompletionFilter = cpu_to_le32(filter);
5794 pSMB->Fid = netfid; /* file handle always le */
5795 pSMB->ByteCount = 0;
5796
5797 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5798 (struct smb_hdr *)pSMBr, &bytes_returned,
5799 CIFS_ASYNC_OP);
5800 if (rc) {
5801 cFYI(1, "Error in Notify = %d", rc);
5802 } else {
5803 /* Add file to outstanding requests */
5804 /* BB change to kmem cache alloc */
5805 dnotify_req = kmalloc(
5806 sizeof(struct dir_notify_req),
5807 GFP_KERNEL);
5808 if (dnotify_req) {
5809 dnotify_req->Pid = pSMB->hdr.Pid;
5810 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5811 dnotify_req->Mid = pSMB->hdr.Mid;
5812 dnotify_req->Tid = pSMB->hdr.Tid;
5813 dnotify_req->Uid = pSMB->hdr.Uid;
5814 dnotify_req->netfid = netfid;
5815 dnotify_req->pfile = pfile;
5816 dnotify_req->filter = filter;
5817 dnotify_req->multishot = multishot;
5818 spin_lock(&GlobalMid_Lock);
5819 list_add_tail(&dnotify_req->lhead,
5820 &GlobalDnotifyReqList);
5821 spin_unlock(&GlobalMid_Lock);
5822 } else
5823 rc = -ENOMEM;
5824 }
5825 cifs_buf_release(pSMB);
5826 return rc;
5827}
5828#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d6c17ab593d..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool fsc:1; /* enable fscache */ 102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */ 103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1; 104 bool multiuser:1;
105 bool use_smb2:1; /* force smb2 use on mount instead of cifs */
105 unsigned int rsize; 106 unsigned int rsize;
106 unsigned int wsize; 107 unsigned int wsize;
107 bool sockopt_tcp_nodelay:1; 108 bool sockopt_tcp_nodelay:1;
@@ -199,8 +200,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
199 } 200 }
200 spin_unlock(&GlobalMid_Lock); 201 spin_unlock(&GlobalMid_Lock);
201 202
202 while ((server->tcpStatus != CifsExiting) && 203 while (server->tcpStatus == CifsNeedReconnect) {
203 (server->tcpStatus != CifsGood)) {
204 try_to_freeze(); 204 try_to_freeze();
205 205
206 /* we should try only the port we connected to before */ 206 /* we should try only the port we connected to before */
@@ -212,7 +212,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
212 atomic_inc(&tcpSesReconnectCount); 212 atomic_inc(&tcpSesReconnectCount);
213 spin_lock(&GlobalMid_Lock); 213 spin_lock(&GlobalMid_Lock);
214 if (server->tcpStatus != CifsExiting) 214 if (server->tcpStatus != CifsExiting)
215 server->tcpStatus = CifsGood; 215 server->tcpStatus = CifsNeedNegotiate;
216 spin_unlock(&GlobalMid_Lock); 216 spin_unlock(&GlobalMid_Lock);
217 } 217 }
218 } 218 }
@@ -248,24 +248,24 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
248 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 248 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
249 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); 249 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
250 250
251 remaining = total_data_size - data_in_this_rsp; 251 if (total_data_size == data_in_this_rsp)
252
253 if (remaining == 0)
254 return 0; 252 return 0;
255 else if (remaining < 0) { 253 else if (total_data_size < data_in_this_rsp) {
256 cFYI(1, "total data %d smaller than data in frame %d", 254 cFYI(1, "total data %d smaller than data in frame %d",
257 total_data_size, data_in_this_rsp); 255 total_data_size, data_in_this_rsp);
258 return -EINVAL; 256 return -EINVAL;
259 } else {
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
268 } 257 }
258
259 remaining = total_data_size - data_in_this_rsp;
260
261 cFYI(1, "missing %d bytes from transact2, check next response",
262 remaining);
263 if (total_data_size > maxBufSize) {
264 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
265 total_data_size, maxBufSize);
266 return -EINVAL;
267 }
268 return remaining;
269} 269}
270 270
271static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) 271static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
@@ -275,7 +275,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
275 char *data_area_of_target; 275 char *data_area_of_target;
276 char *data_area_of_buf2; 276 char *data_area_of_buf2;
277 int remaining; 277 int remaining;
278 __u16 byte_count, total_data_size, total_in_buf, total_in_buf2; 278 unsigned int byte_count, total_in_buf;
279 __u16 total_data_size, total_in_buf2;
279 280
280 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 281 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
281 282
@@ -288,7 +289,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
288 remaining = total_data_size - total_in_buf; 289 remaining = total_data_size - total_in_buf;
289 290
290 if (remaining < 0) 291 if (remaining < 0)
291 return -EINVAL; 292 return -EPROTO;
292 293
293 if (remaining == 0) /* nothing to do, ignore */ 294 if (remaining == 0) /* nothing to do, ignore */
294 return 0; 295 return 0;
@@ -309,19 +310,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
309 data_area_of_target += total_in_buf; 310 data_area_of_target += total_in_buf;
310 311
311 /* copy second buffer into end of first buffer */ 312 /* copy second buffer into end of first buffer */
312 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
313 total_in_buf += total_in_buf2; 313 total_in_buf += total_in_buf2;
314 /* is the result too big for the field? */
315 if (total_in_buf > USHRT_MAX)
316 return -EPROTO;
314 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); 317 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
315 byte_count = get_bcc_le(pTargetSMB);
316 byte_count += total_in_buf2;
317 put_bcc_le(byte_count, pTargetSMB);
318 318
319 byte_count = pTargetSMB->smb_buf_length; 319 /* fix up the BCC */
320 byte_count = get_bcc(pTargetSMB);
320 byte_count += total_in_buf2; 321 byte_count += total_in_buf2;
322 /* is the result too big for the field? */
323 if (byte_count > USHRT_MAX)
324 return -EPROTO;
325 put_bcc(byte_count, pTargetSMB);
321 326
322 /* BB also add check that we are not beyond maximum buffer size */ 327 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
328 byte_count += total_in_buf2;
329 /* don't allow buffer to overflow */
330 if (byte_count > CIFSMaxBufSize)
331 return -ENOBUFS;
332 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
323 333
324 pTargetSMB->smb_buf_length = byte_count; 334 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
325 335
326 if (remaining == total_in_buf2) { 336 if (remaining == total_in_buf2) {
327 cFYI(1, "found the last secondary response"); 337 cFYI(1, "found the last secondary response");
@@ -421,7 +431,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
421 pdu_length = 4; /* enough to get RFC1001 header */ 431 pdu_length = 4; /* enough to get RFC1001 header */
422 432
423incomplete_rcv: 433incomplete_rcv:
424 if (echo_retries > 0 && 434 if (echo_retries > 0 && server->tcpStatus == CifsGood &&
425 time_after(jiffies, server->lstrp + 435 time_after(jiffies, server->lstrp +
426 (echo_retries * SMB_ECHO_INTERVAL))) { 436 (echo_retries * SMB_ECHO_INTERVAL))) {
427 cERROR(1, "Server %s has not responded in %d seconds. " 437 cERROR(1, "Server %s has not responded in %d seconds. "
@@ -486,8 +496,7 @@ incomplete_rcv:
486 /* Note that FC 1001 length is big endian on the wire, 496 /* Note that FC 1001 length is big endian on the wire,
487 but we convert it here so it is always manipulated 497 but we convert it here so it is always manipulated
488 as host byte order */ 498 as host byte order */
489 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 499 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
490 smb_buffer->smb_buf_length = pdu_length;
491 500
492 cFYI(1, "rfc1002 length 0x%x", pdu_length+4); 501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
493 502
@@ -608,59 +617,63 @@ incomplete_rcv:
608 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 617 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
609 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 618 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
610 619
611 if ((mid_entry->mid == smb_buffer->Mid) && 620 if (mid_entry->mid != smb_buffer->Mid ||
612 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 621 mid_entry->midState != MID_REQUEST_SUBMITTED ||
613 (mid_entry->command == smb_buffer->Command)) { 622 mid_entry->command != smb_buffer->Command) {
614 if (length == 0 && 623 mid_entry = NULL;
615 check2ndT2(smb_buffer, server->maxBuf) > 0) { 624 continue;
616 /* We have a multipart transact2 resp */ 625 }
617 isMultiRsp = true; 626
618 if (mid_entry->resp_buf) { 627 if (length == 0 &&
619 /* merge response - fix up 1st*/ 628 check2ndT2(smb_buffer, server->maxBuf) > 0) {
620 if (coalesce_t2(smb_buffer, 629 /* We have a multipart transact2 resp */
621 mid_entry->resp_buf)) { 630 isMultiRsp = true;
622 mid_entry->multiRsp = 631 if (mid_entry->resp_buf) {
623 true; 632 /* merge response - fix up 1st*/
624 break; 633 length = coalesce_t2(smb_buffer,
625 } else { 634 mid_entry->resp_buf);
626 /* all parts received */ 635 if (length > 0) {
627 mid_entry->multiEnd = 636 length = 0;
628 true; 637 mid_entry->multiRsp = true;
629 goto multi_t2_fnd; 638 break;
630 }
631 } else { 639 } else {
632 if (!isLargeBuf) { 640 /* all parts received or
633 cERROR(1, "1st trans2 resp needs bigbuf"); 641 * packet is malformed
634 /* BB maybe we can fix this up, switch 642 */
635 to already allocated large buffer? */ 643 mid_entry->multiEnd = true;
636 } else { 644 goto multi_t2_fnd;
637 /* Have first buffer */ 645 }
638 mid_entry->resp_buf = 646 } else {
639 smb_buffer; 647 if (!isLargeBuf) {
640 mid_entry->largeBuf = 648 /*
641 true; 649 * FIXME: switch to already
642 bigbuf = NULL; 650 * allocated largebuf?
643 } 651 */
652 cERROR(1, "1st trans2 resp "
653 "needs bigbuf");
654 } else {
655 /* Have first buffer */
656 mid_entry->resp_buf =
657 smb_buffer;
658 mid_entry->largeBuf = true;
659 bigbuf = NULL;
644 } 660 }
645 break;
646 } 661 }
647 mid_entry->resp_buf = smb_buffer; 662 break;
648 mid_entry->largeBuf = isLargeBuf; 663 }
664 mid_entry->resp_buf = smb_buffer;
665 mid_entry->largeBuf = isLargeBuf;
649multi_t2_fnd: 666multi_t2_fnd:
650 if (length == 0) 667 if (length == 0)
651 mid_entry->midState = 668 mid_entry->midState = MID_RESPONSE_RECEIVED;
652 MID_RESPONSE_RECEIVED; 669 else
653 else 670 mid_entry->midState = MID_RESPONSE_MALFORMED;
654 mid_entry->midState =
655 MID_RESPONSE_MALFORMED;
656#ifdef CONFIG_CIFS_STATS2 671#ifdef CONFIG_CIFS_STATS2
657 mid_entry->when_received = jiffies; 672 mid_entry->when_received = jiffies;
658#endif 673#endif
659 list_del_init(&mid_entry->qhead); 674 list_del_init(&mid_entry->qhead);
660 mid_entry->callback(mid_entry); 675 mid_entry->callback(mid_entry);
661 break; 676 break;
662 }
663 mid_entry = NULL;
664 } 677 }
665 spin_unlock(&GlobalMid_Lock); 678 spin_unlock(&GlobalMid_Lock);
666 679
@@ -722,7 +735,7 @@ multi_t2_fnd:
722 sock_release(csocket); 735 sock_release(csocket);
723 server->ssocket = NULL; 736 server->ssocket = NULL;
724 } 737 }
725 /* buffer usuallly freed in free_mid - need to free it here on exit */ 738 /* buffer usually freed in free_mid - need to free it here on exit */
726 cifs_buf_release(bigbuf); 739 cifs_buf_release(bigbuf);
727 if (smallbuf) /* no sense logging a debug message if NULL */ 740 if (smallbuf) /* no sense logging a debug message if NULL */
728 cifs_small_buf_release(smallbuf); 741 cifs_small_buf_release(smallbuf);
@@ -805,11 +818,11 @@ extract_hostname(const char *unc)
805} 818}
806 819
807static int 820static int
808cifs_parse_mount_options(char *options, const char *devname, 821cifs_parse_mount_options(const char *mountdata, const char *devname,
809 struct smb_vol *vol) 822 struct smb_vol *vol)
810{ 823{
811 char *value; 824 char *value, *data, *end;
812 char *data; 825 char *mountdata_copy, *options;
813 unsigned int temp_len, i, j; 826 unsigned int temp_len, i, j;
814 char separator[2]; 827 char separator[2];
815 short int override_uid = -1; 828 short int override_uid = -1;
@@ -849,9 +862,15 @@ cifs_parse_mount_options(char *options, const char *devname,
849 862
850 vol->actimeo = CIFS_DEF_ACTIMEO; 863 vol->actimeo = CIFS_DEF_ACTIMEO;
851 864
852 if (!options) 865 if (!mountdata)
853 return 1; 866 goto cifs_parse_mount_err;
867
868 mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
869 if (!mountdata_copy)
870 goto cifs_parse_mount_err;
854 871
872 options = mountdata_copy;
873 end = options + strlen(options);
855 if (strncmp(options, "sep=", 4) == 0) { 874 if (strncmp(options, "sep=", 4) == 0) {
856 if (options[4] != 0) { 875 if (options[4] != 0) {
857 separator[0] = options[4]; 876 separator[0] = options[4];
@@ -876,16 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
876 if (!value) { 895 if (!value) {
877 printk(KERN_WARNING 896 printk(KERN_WARNING
878 "CIFS: invalid or missing username\n"); 897 "CIFS: invalid or missing username\n");
879 return 1; /* needs_arg; */ 898 goto cifs_parse_mount_err;
880 } else if (!*value) { 899 } else if (!*value) {
881 /* null user, ie anonymous, authentication */ 900 /* null user, ie anonymous, authentication */
882 vol->nullauth = 1; 901 vol->nullauth = 1;
883 } 902 }
884 if (strnlen(value, 200) < 200) { 903 if (strnlen(value, MAX_USERNAME_SIZE) <
885 vol->username = value; 904 MAX_USERNAME_SIZE) {
905 vol->username = kstrdup(value, GFP_KERNEL);
906 if (!vol->username) {
907 printk(KERN_WARNING "CIFS: no memory "
908 "for username\n");
909 goto cifs_parse_mount_err;
910 }
886 } else { 911 } else {
887 printk(KERN_WARNING "CIFS: username too long\n"); 912 printk(KERN_WARNING "CIFS: username too long\n");
888 return 1; 913 goto cifs_parse_mount_err;
889 } 914 }
890 } else if (strnicmp(data, "pass", 4) == 0) { 915 } else if (strnicmp(data, "pass", 4) == 0) {
891 if (!value) { 916 if (!value) {
@@ -916,6 +941,7 @@ cifs_parse_mount_options(char *options, const char *devname,
916 the only illegal character in a password is null */ 941 the only illegal character in a password is null */
917 942
918 if ((value[temp_len] == 0) && 943 if ((value[temp_len] == 0) &&
944 (value + temp_len < end) &&
919 (value[temp_len+1] == separator[0])) { 945 (value[temp_len+1] == separator[0])) {
920 /* reinsert comma */ 946 /* reinsert comma */
921 value[temp_len] = separator[0]; 947 value[temp_len] = separator[0];
@@ -948,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
948 if (vol->password == NULL) { 974 if (vol->password == NULL) {
949 printk(KERN_WARNING "CIFS: no memory " 975 printk(KERN_WARNING "CIFS: no memory "
950 "for password\n"); 976 "for password\n");
951 return 1; 977 goto cifs_parse_mount_err;
952 } 978 }
953 for (i = 0, j = 0; i < temp_len; i++, j++) { 979 for (i = 0, j = 0; i < temp_len; i++, j++) {
954 vol->password[j] = value[i]; 980 vol->password[j] = value[i];
@@ -964,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
964 if (vol->password == NULL) { 990 if (vol->password == NULL) {
965 printk(KERN_WARNING "CIFS: no memory " 991 printk(KERN_WARNING "CIFS: no memory "
966 "for password\n"); 992 "for password\n");
967 return 1; 993 goto cifs_parse_mount_err;
968 } 994 }
969 strcpy(vol->password, value); 995 strcpy(vol->password, value);
970 } 996 }
@@ -974,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
974 vol->UNCip = NULL; 1000 vol->UNCip = NULL;
975 } else if (strnlen(value, INET6_ADDRSTRLEN) < 1001 } else if (strnlen(value, INET6_ADDRSTRLEN) <
976 INET6_ADDRSTRLEN) { 1002 INET6_ADDRSTRLEN) {
977 vol->UNCip = value; 1003 vol->UNCip = kstrdup(value, GFP_KERNEL);
1004 if (!vol->UNCip) {
1005 printk(KERN_WARNING "CIFS: no memory "
1006 "for UNC IP\n");
1007 goto cifs_parse_mount_err;
1008 }
978 } else { 1009 } else {
979 printk(KERN_WARNING "CIFS: ip address " 1010 printk(KERN_WARNING "CIFS: ip address "
980 "too long\n"); 1011 "too long\n");
981 return 1; 1012 goto cifs_parse_mount_err;
982 } 1013 }
983 } else if (strnicmp(data, "sec", 3) == 0) { 1014 } else if (strnicmp(data, "sec", 3) == 0) {
984 if (!value || !*value) { 1015 if (!value || !*value) {
@@ -991,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
991 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 1022 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
992 CIFSSEC_MAY_KRB5; */ 1023 CIFSSEC_MAY_KRB5; */
993 cERROR(1, "Krb5 cifs privacy not supported"); 1024 cERROR(1, "Krb5 cifs privacy not supported");
994 return 1; 1025 goto cifs_parse_mount_err;
995 } else if (strnicmp(value, "krb5", 4) == 0) { 1026 } else if (strnicmp(value, "krb5", 4) == 0) {
996 vol->secFlg |= CIFSSEC_MAY_KRB5; 1027 vol->secFlg |= CIFSSEC_MAY_KRB5;
997 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 1028 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1021,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
1021 vol->nullauth = 1; 1052 vol->nullauth = 1;
1022 } else { 1053 } else {
1023 cERROR(1, "bad security option: %s", value); 1054 cERROR(1, "bad security option: %s", value);
1024 return 1; 1055 goto cifs_parse_mount_err;
1056 }
1057 } else if (strnicmp(data, "vers", 3) == 0) {
1058 if (!value || !*value) {
1059 cERROR(1, "no protocol version specified"
1060 " after vers= mount option");
1061 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1062 (strnicmp(value, "1", 1) == 0)) {
1063 /* this is the default */
1064 continue;
1065 } else if ((strnicmp(value, "smb2", 4) == 0) ||
1066 (strnicmp(value, "2", 1) == 0)) {
1067#ifdef CONFIG_CIFS_SMB2
1068 vol->use_smb2 = true;
1069#else
1070 cERROR(1, "smb2 support not enabled");
1071#endif /* CONFIG_CIFS_SMB2 */
1025 } 1072 }
1026 } else if ((strnicmp(data, "unc", 3) == 0) 1073 } else if ((strnicmp(data, "unc", 3) == 0)
1027 || (strnicmp(data, "target", 6) == 0) 1074 || (strnicmp(data, "target", 6) == 0)
@@ -1029,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1029 if (!value || !*value) { 1076 if (!value || !*value) {
1030 printk(KERN_WARNING "CIFS: invalid path to " 1077 printk(KERN_WARNING "CIFS: invalid path to "
1031 "network resource\n"); 1078 "network resource\n");
1032 return 1; /* needs_arg; */ 1079 goto cifs_parse_mount_err;
1033 } 1080 }
1034 if ((temp_len = strnlen(value, 300)) < 300) { 1081 if ((temp_len = strnlen(value, 300)) < 300) {
1035 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1082 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1036 if (vol->UNC == NULL) 1083 if (vol->UNC == NULL)
1037 return 1; 1084 goto cifs_parse_mount_err;
1038 strcpy(vol->UNC, value); 1085 strcpy(vol->UNC, value);
1039 if (strncmp(vol->UNC, "//", 2) == 0) { 1086 if (strncmp(vol->UNC, "//", 2) == 0) {
1040 vol->UNC[0] = '\\'; 1087 vol->UNC[0] = '\\';
@@ -1043,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
1043 printk(KERN_WARNING 1090 printk(KERN_WARNING
1044 "CIFS: UNC Path does not begin " 1091 "CIFS: UNC Path does not begin "
1045 "with // or \\\\ \n"); 1092 "with // or \\\\ \n");
1046 return 1; 1093 goto cifs_parse_mount_err;
1047 } 1094 }
1048 } else { 1095 } else {
1049 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1096 printk(KERN_WARNING "CIFS: UNC name too long\n");
1050 return 1; 1097 goto cifs_parse_mount_err;
1051 } 1098 }
1052 } else if ((strnicmp(data, "domain", 3) == 0) 1099 } else if ((strnicmp(data, "domain", 3) == 0)
1053 || (strnicmp(data, "workgroup", 5) == 0)) { 1100 || (strnicmp(data, "workgroup", 5) == 0)) {
1054 if (!value || !*value) { 1101 if (!value || !*value) {
1055 printk(KERN_WARNING "CIFS: invalid domain name\n"); 1102 printk(KERN_WARNING "CIFS: invalid domain name\n");
1056 return 1; /* needs_arg; */ 1103 goto cifs_parse_mount_err;
1057 } 1104 }
1058 /* BB are there cases in which a comma can be valid in 1105 /* BB are there cases in which a comma can be valid in
1059 a domain name and need special handling? */ 1106 a domain name and need special handling? */
1060 if (strnlen(value, 256) < 256) { 1107 if (strnlen(value, 256) < 256) {
1061 vol->domainname = value; 1108 vol->domainname = kstrdup(value, GFP_KERNEL);
1109 if (!vol->domainname) {
1110 printk(KERN_WARNING "CIFS: no memory "
1111 "for domainname\n");
1112 goto cifs_parse_mount_err;
1113 }
1062 cFYI(1, "Domain name set"); 1114 cFYI(1, "Domain name set");
1063 } else { 1115 } else {
1064 printk(KERN_WARNING "CIFS: domain name too " 1116 printk(KERN_WARNING "CIFS: domain name too "
1065 "long\n"); 1117 "long\n");
1066 return 1; 1118 goto cifs_parse_mount_err;
1067 } 1119 }
1068 } else if (strnicmp(data, "srcaddr", 7) == 0) { 1120 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1069 vol->srcaddr.ss_family = AF_UNSPEC; 1121 vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1071,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1071 if (!value || !*value) { 1123 if (!value || !*value) {
1072 printk(KERN_WARNING "CIFS: srcaddr value" 1124 printk(KERN_WARNING "CIFS: srcaddr value"
1073 " not specified.\n"); 1125 " not specified.\n");
1074 return 1; /* needs_arg; */ 1126 goto cifs_parse_mount_err;
1075 } 1127 }
1076 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1128 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1077 value, strlen(value)); 1129 value, strlen(value));
@@ -1079,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1079 printk(KERN_WARNING "CIFS: Could not parse" 1131 printk(KERN_WARNING "CIFS: Could not parse"
1080 " srcaddr: %s\n", 1132 " srcaddr: %s\n",
1081 value); 1133 value);
1082 return 1; 1134 goto cifs_parse_mount_err;
1083 } 1135 }
1084 } else if (strnicmp(data, "prefixpath", 10) == 0) { 1136 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1085 if (!value || !*value) { 1137 if (!value || !*value) {
1086 printk(KERN_WARNING 1138 printk(KERN_WARNING
1087 "CIFS: invalid path prefix\n"); 1139 "CIFS: invalid path prefix\n");
1088 return 1; /* needs_argument */ 1140 goto cifs_parse_mount_err;
1089 } 1141 }
1090 if ((temp_len = strnlen(value, 1024)) < 1024) { 1142 if ((temp_len = strnlen(value, 1024)) < 1024) {
1091 if (value[0] != '/') 1143 if (value[0] != '/')
1092 temp_len++; /* missing leading slash */ 1144 temp_len++; /* missing leading slash */
1093 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); 1145 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1094 if (vol->prepath == NULL) 1146 if (vol->prepath == NULL)
1095 return 1; 1147 goto cifs_parse_mount_err;
1096 if (value[0] != '/') { 1148 if (value[0] != '/') {
1097 vol->prepath[0] = '/'; 1149 vol->prepath[0] = '/';
1098 strcpy(vol->prepath+1, value); 1150 strcpy(vol->prepath+1, value);
@@ -1101,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
1101 cFYI(1, "prefix path %s", vol->prepath); 1153 cFYI(1, "prefix path %s", vol->prepath);
1102 } else { 1154 } else {
1103 printk(KERN_WARNING "CIFS: prefix too long\n"); 1155 printk(KERN_WARNING "CIFS: prefix too long\n");
1104 return 1; 1156 goto cifs_parse_mount_err;
1105 } 1157 }
1106 } else if (strnicmp(data, "iocharset", 9) == 0) { 1158 } else if (strnicmp(data, "iocharset", 9) == 0) {
1107 if (!value || !*value) { 1159 if (!value || !*value) {
1108 printk(KERN_WARNING "CIFS: invalid iocharset " 1160 printk(KERN_WARNING "CIFS: invalid iocharset "
1109 "specified\n"); 1161 "specified\n");
1110 return 1; /* needs_arg; */ 1162 goto cifs_parse_mount_err;
1111 } 1163 }
1112 if (strnlen(value, 65) < 65) { 1164 if (strnlen(value, 65) < 65) {
1113 if (strnicmp(value, "default", 7)) 1165 if (strnicmp(value, "default", 7)) {
1114 vol->iocharset = value; 1166 vol->iocharset = kstrdup(value,
1167 GFP_KERNEL);
1168
1169 if (!vol->iocharset) {
1170 printk(KERN_WARNING "CIFS: no "
1171 "memory for"
1172 "charset\n");
1173 goto cifs_parse_mount_err;
1174 }
1175 }
1115 /* if iocharset not set then load_nls_default 1176 /* if iocharset not set then load_nls_default
1116 is used by caller */ 1177 is used by caller */
1117 cFYI(1, "iocharset set to %s", value); 1178 cFYI(1, "iocharset set to %s", value);
1118 } else { 1179 } else {
1119 printk(KERN_WARNING "CIFS: iocharset name " 1180 printk(KERN_WARNING "CIFS: iocharset name "
1120 "too long.\n"); 1181 "too long.\n");
1121 return 1; 1182 goto cifs_parse_mount_err;
1122 } 1183 }
1123 } else if (!strnicmp(data, "uid", 3) && value && *value) { 1184 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1124 vol->linux_uid = simple_strtoul(value, &value, 0); 1185 vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1231,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1231 if (vol->actimeo > CIFS_MAX_ACTIMEO) { 1292 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1232 cERROR(1, "CIFS: attribute cache" 1293 cERROR(1, "CIFS: attribute cache"
1233 "timeout too large"); 1294 "timeout too large");
1234 return 1; 1295 goto cifs_parse_mount_err;
1235 } 1296 }
1236 } 1297 }
1237 } else if (strnicmp(data, "credentials", 4) == 0) { 1298 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1375,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1375#ifndef CONFIG_CIFS_FSCACHE 1436#ifndef CONFIG_CIFS_FSCACHE
1376 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1437 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1377 "kernel config option set"); 1438 "kernel config option set");
1378 return 1; 1439 goto cifs_parse_mount_err;
1379#endif 1440#endif
1380 vol->fsc = true; 1441 vol->fsc = true;
1381 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1442 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1390,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1390 if (devname == NULL) { 1451 if (devname == NULL) {
1391 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1452 printk(KERN_WARNING "CIFS: Missing UNC name for mount "
1392 "target\n"); 1453 "target\n");
1393 return 1; 1454 goto cifs_parse_mount_err;
1394 } 1455 }
1395 if ((temp_len = strnlen(devname, 300)) < 300) { 1456 if ((temp_len = strnlen(devname, 300)) < 300) {
1396 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1457 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1397 if (vol->UNC == NULL) 1458 if (vol->UNC == NULL)
1398 return 1; 1459 goto cifs_parse_mount_err;
1399 strcpy(vol->UNC, devname); 1460 strcpy(vol->UNC, devname);
1400 if (strncmp(vol->UNC, "//", 2) == 0) { 1461 if (strncmp(vol->UNC, "//", 2) == 0) {
1401 vol->UNC[0] = '\\'; 1462 vol->UNC[0] = '\\';
@@ -1403,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
1403 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1464 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1404 printk(KERN_WARNING "CIFS: UNC Path does not " 1465 printk(KERN_WARNING "CIFS: UNC Path does not "
1405 "begin with // or \\\\ \n"); 1466 "begin with // or \\\\ \n");
1406 return 1; 1467 goto cifs_parse_mount_err;
1407 } 1468 }
1408 value = strpbrk(vol->UNC+2, "/\\"); 1469 value = strpbrk(vol->UNC+2, "/\\");
1409 if (value) 1470 if (value)
1410 *value = '\\'; 1471 *value = '\\';
1411 } else { 1472 } else {
1412 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1473 printk(KERN_WARNING "CIFS: UNC name too long\n");
1413 return 1; 1474 goto cifs_parse_mount_err;
1414 } 1475 }
1415 } 1476 }
1416 1477
1417 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { 1478 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
1418 cERROR(1, "Multiuser mounts currently require krb5 " 1479 cERROR(1, "Multiuser mounts currently require krb5 "
1419 "authentication!"); 1480 "authentication!");
1420 return 1; 1481 goto cifs_parse_mount_err;
1421 } 1482 }
1422 1483
1423 if (vol->UNCip == NULL) 1484 if (vol->UNCip == NULL)
@@ -1435,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1435 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1496 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1436 "specified with no gid= option.\n"); 1497 "specified with no gid= option.\n");
1437 1498
1499 kfree(mountdata_copy);
1438 return 0; 1500 return 0;
1501
1502cifs_parse_mount_err:
1503 kfree(mountdata_copy);
1504 return 1;
1439} 1505}
1440 1506
1441/** Returns true if srcaddr isn't specified and rhs isn't 1507/** Returns true if srcaddr isn't specified and rhs isn't
@@ -1472,7 +1538,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1472static bool 1538static bool
1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr) 1539match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1474{ 1540{
1475 unsigned short int port, *sport; 1541 __be16 port, *sport;
1476 1542
1477 switch (addr->sa_family) { 1543 switch (addr->sa_family) {
1478 case AF_INET: 1544 case AF_INET:
@@ -1572,7 +1638,7 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1572 return false; 1638 return false;
1573 } 1639 }
1574 1640
1575 /* now check if signing mode is acceptible */ 1641 /* now check if signing mode is acceptable */
1576 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && 1642 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1577 (server->secMode & SECMODE_SIGN_REQUIRED)) 1643 (server->secMode & SECMODE_SIGN_REQUIRED))
1578 return false; 1644 return false;
@@ -1765,6 +1831,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1765 module_put(THIS_MODULE); 1831 module_put(THIS_MODULE);
1766 goto out_err_crypto_release; 1832 goto out_err_crypto_release;
1767 } 1833 }
1834 tcp_ses->tcpStatus = CifsNeedNegotiate;
1768 1835
1769 /* thread spawned, put it on the list */ 1836 /* thread spawned, put it on the list */
1770 spin_lock(&cifs_tcp_ses_lock); 1837 spin_lock(&cifs_tcp_ses_lock);
@@ -1808,7 +1875,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1808 break; 1875 break;
1809 default: 1876 default:
1810 /* anything else takes username/password */ 1877 /* anything else takes username/password */
1811 if (strncmp(ses->userName, vol->username, 1878 if (ses->user_name == NULL)
1879 continue;
1880 if (strncmp(ses->user_name, vol->username,
1812 MAX_USERNAME_SIZE)) 1881 MAX_USERNAME_SIZE))
1813 continue; 1882 continue;
1814 if (strlen(vol->username) != 0 && 1883 if (strlen(vol->username) != 0 &&
@@ -1851,6 +1920,8 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1851 cifs_put_tcp_session(server); 1920 cifs_put_tcp_session(server);
1852} 1921}
1853 1922
1923static bool warned_on_ntlm; /* globals init to false automatically */
1924
1854static struct cifsSesInfo * 1925static struct cifsSesInfo *
1855cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1926cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1856{ 1927{
@@ -1906,9 +1977,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1906 else 1977 else
1907 sprintf(ses->serverName, "%pI4", &addr->sin_addr); 1978 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1908 1979
1909 if (volume_info->username) 1980 if (volume_info->username) {
1910 strncpy(ses->userName, volume_info->username, 1981 ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
1911 MAX_USERNAME_SIZE); 1982 if (!ses->user_name)
1983 goto get_ses_fail;
1984 }
1912 1985
1913 /* volume_info->password freed at unmount */ 1986 /* volume_info->password freed at unmount */
1914 if (volume_info->password) { 1987 if (volume_info->password) {
@@ -1923,6 +1996,15 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1923 } 1996 }
1924 ses->cred_uid = volume_info->cred_uid; 1997 ses->cred_uid = volume_info->cred_uid;
1925 ses->linux_uid = volume_info->linux_uid; 1998 ses->linux_uid = volume_info->linux_uid;
1999
2000 /* ntlmv2 is much stronger than ntlm security, and has been broadly
2001 supported for many years, time to update default security mechanism */
2002 if ((volume_info->secFlg == 0) && warned_on_ntlm == false) {
2003 warned_on_ntlm = true;
2004 cERROR(1, "default security mechanism requested. The default "
2005 "security mechanism will be upgraded from ntlm to "
2006 "ntlmv2 in kernel release 2.6.41");
2007 }
1926 ses->overrideSecFlg = volume_info->secFlg; 2008 ses->overrideSecFlg = volume_info->secFlg;
1927 2009
1928 mutex_lock(&ses->session_mutex); 2010 mutex_lock(&ses->session_mutex);
@@ -2249,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2249 smb_buf = (struct smb_hdr *)ses_init_buf; 2331 smb_buf = (struct smb_hdr *)ses_init_buf;
2250 2332
2251 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 2333 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2252 smb_buf->smb_buf_length = 0x81000044; 2334 smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
2253 rc = smb_send(server, smb_buf, 0x44); 2335 rc = smb_send(server, smb_buf, 0x44);
2254 kfree(ses_init_buf); 2336 kfree(ses_init_buf);
2255 /* 2337 /*
@@ -2276,7 +2358,7 @@ static int
2276generic_ip_connect(struct TCP_Server_Info *server) 2358generic_ip_connect(struct TCP_Server_Info *server)
2277{ 2359{
2278 int rc = 0; 2360 int rc = 0;
2279 unsigned short int sport; 2361 __be16 sport;
2280 int slen, sfamily; 2362 int slen, sfamily;
2281 struct socket *socket = server->ssocket; 2363 struct socket *socket = server->ssocket;
2282 struct sockaddr *saddr; 2364 struct sockaddr *saddr;
@@ -2361,7 +2443,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
2361static int 2443static int
2362ip_connect(struct TCP_Server_Info *server) 2444ip_connect(struct TCP_Server_Info *server)
2363{ 2445{
2364 unsigned short int *sport; 2446 __be16 *sport;
2365 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 2447 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2366 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 2448 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2367 2449
@@ -2642,6 +2724,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
2642 0 /* not legacy */, cifs_sb->local_nls, 2724 0 /* not legacy */, cifs_sb->local_nls,
2643 cifs_sb->mnt_cifs_flags & 2725 cifs_sb->mnt_cifs_flags &
2644 CIFS_MOUNT_MAP_SPECIAL_CHR); 2726 CIFS_MOUNT_MAP_SPECIAL_CHR);
2727
2728 if (rc == -EOPNOTSUPP || rc == -EINVAL)
2729 rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
2730 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
2731 CIFS_MOUNT_MAP_SPECIAL_CHR);
2645 kfree(pfile_info); 2732 kfree(pfile_info);
2646 return rc; 2733 return rc;
2647} 2734}
@@ -2655,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2655 return; 2742 return;
2656 2743
2657 volume_info = *pvolume_info; 2744 volume_info = *pvolume_info;
2745 kfree(volume_info->username);
2658 kzfree(volume_info->password); 2746 kzfree(volume_info->password);
2659 kfree(volume_info->UNC); 2747 kfree(volume_info->UNC);
2748 kfree(volume_info->UNCip);
2749 kfree(volume_info->domainname);
2750 kfree(volume_info->iocharset);
2660 kfree(volume_info->prepath); 2751 kfree(volume_info->prepath);
2661 kfree(volume_info); 2752 kfree(volume_info);
2662 *pvolume_info = NULL; 2753 *pvolume_info = NULL;
@@ -2693,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2693 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */ 2784 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2694 return full_path; 2785 return full_path;
2695} 2786}
2787
2788/*
2789 * Perform a dfs referral query for a share and (optionally) prefix
2790 *
2791 * If a referral is found, cifs_sb->mountdata will be (re-)allocated
2792 * to a string containing updated options for the submount. Otherwise it
2793 * will be left untouched.
2794 *
2795 * Returns the rc from get_dfs_path to the caller, which can be used to
2796 * determine whether there were referrals.
2797 */
2798static int
2799expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
2800 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2801 int check_prefix)
2802{
2803 int rc;
2804 unsigned int num_referrals = 0;
2805 struct dfs_info3_param *referrals = NULL;
2806 char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
2807
2808 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2809 if (IS_ERR(full_path))
2810 return PTR_ERR(full_path);
2811
2812 /* For DFS paths, skip the first '\' of the UNC */
2813 ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
2814
2815 rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
2816 &num_referrals, &referrals,
2817 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2818
2819 if (!rc && num_referrals > 0) {
2820 char *fake_devname = NULL;
2821
2822 mdata = cifs_compose_mount_options(cifs_sb->mountdata,
2823 full_path + 1, referrals,
2824 &fake_devname);
2825
2826 free_dfs_info_array(referrals, num_referrals);
2827 kfree(fake_devname);
2828
2829 if (cifs_sb->mountdata != NULL)
2830 kfree(cifs_sb->mountdata);
2831
2832 if (IS_ERR(mdata)) {
2833 rc = PTR_ERR(mdata);
2834 mdata = NULL;
2835 }
2836 cifs_sb->mountdata = mdata;
2837 }
2838 kfree(full_path);
2839 return rc;
2840}
2696#endif 2841#endif
2697 2842
2698int 2843int
2699cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2844cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2700 char *mount_data_global, const char *devname) 2845 const char *devname)
2701{ 2846{
2702 int rc; 2847 int rc;
2703 int xid; 2848 int xid;
@@ -2706,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2706 struct cifsTconInfo *tcon; 2851 struct cifsTconInfo *tcon;
2707 struct TCP_Server_Info *srvTcp; 2852 struct TCP_Server_Info *srvTcp;
2708 char *full_path; 2853 char *full_path;
2709 char *mount_data = mount_data_global;
2710 struct tcon_link *tlink; 2854 struct tcon_link *tlink;
2711#ifdef CONFIG_CIFS_DFS_UPCALL 2855#ifdef CONFIG_CIFS_DFS_UPCALL
2712 struct dfs_info3_param *referrals = NULL;
2713 unsigned int num_referrals = 0;
2714 int referral_walks_count = 0; 2856 int referral_walks_count = 0;
2715try_mount_again: 2857try_mount_again:
2858 /* cleanup activities if we're chasing a referral */
2859 if (referral_walks_count) {
2860 if (tcon)
2861 cifs_put_tcon(tcon);
2862 else if (pSesInfo)
2863 cifs_put_smb_ses(pSesInfo);
2864
2865 cleanup_volume_info(&volume_info);
2866 FreeXid(xid);
2867 }
2716#endif 2868#endif
2717 rc = 0; 2869 rc = 0;
2718 tcon = NULL; 2870 tcon = NULL;
@@ -2729,7 +2881,8 @@ try_mount_again:
2729 goto out; 2881 goto out;
2730 } 2882 }
2731 2883
2732 if (cifs_parse_mount_options(mount_data, devname, volume_info)) { 2884 if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
2885 volume_info)) {
2733 rc = -EINVAL; 2886 rc = -EINVAL;
2734 goto out; 2887 goto out;
2735 } 2888 }
@@ -2825,8 +2978,26 @@ try_mount_again:
2825 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2978 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2826 2979
2827remote_path_check: 2980remote_path_check:
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 /*
2983 * Perform an unconditional check for whether there are DFS
2984 * referrals for this path without prefix, to provide support
2985 * for DFS referrals from w2k8 servers which don't seem to respond
2986 * with PATH_NOT_COVERED to requests that include the prefix.
2987 * Chase the referral if found, otherwise continue normally.
2988 */
2989 if (referral_walks_count == 0) {
2990 int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
2991 cifs_sb, false);
2992 if (!refrc) {
2993 referral_walks_count++;
2994 goto try_mount_again;
2995 }
2996 }
2997#endif
2998
2828 /* check if a whole path (including prepath) is not remote */ 2999 /* check if a whole path (including prepath) is not remote */
2829 if (!rc && cifs_sb->prepathlen && tcon) { 3000 if (!rc && tcon) {
2830 /* build_path_to_root works only when we have a valid tcon */ 3001 /* build_path_to_root works only when we have a valid tcon */
2831 full_path = cifs_build_path_to_root(cifs_sb, tcon); 3002 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2832 if (full_path == NULL) { 3003 if (full_path == NULL) {
@@ -2858,46 +3029,15 @@ remote_path_check:
2858 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3029 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2859 convert_delimiter(cifs_sb->prepath, 3030 convert_delimiter(cifs_sb->prepath,
2860 CIFS_DIR_SEP(cifs_sb)); 3031 CIFS_DIR_SEP(cifs_sb));
2861 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2862 if (IS_ERR(full_path)) {
2863 rc = PTR_ERR(full_path);
2864 goto mount_fail_check;
2865 }
2866
2867 cFYI(1, "Getting referral for: %s", full_path);
2868 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2869 cifs_sb->local_nls, &num_referrals, &referrals,
2870 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2871 if (!rc && num_referrals > 0) {
2872 char *fake_devname = NULL;
2873 3032
2874 if (mount_data != mount_data_global) 3033 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
2875 kfree(mount_data); 3034 true);
2876
2877 mount_data = cifs_compose_mount_options(
2878 cifs_sb->mountdata, full_path + 1,
2879 referrals, &fake_devname);
2880
2881 free_dfs_info_array(referrals, num_referrals);
2882 kfree(fake_devname);
2883 kfree(full_path);
2884 3035
2885 if (IS_ERR(mount_data)) { 3036 if (!rc) {
2886 rc = PTR_ERR(mount_data);
2887 mount_data = NULL;
2888 goto mount_fail_check;
2889 }
2890
2891 if (tcon)
2892 cifs_put_tcon(tcon);
2893 else if (pSesInfo)
2894 cifs_put_smb_ses(pSesInfo);
2895
2896 cleanup_volume_info(&volume_info);
2897 referral_walks_count++; 3037 referral_walks_count++;
2898 FreeXid(xid);
2899 goto try_mount_again; 3038 goto try_mount_again;
2900 } 3039 }
3040 goto mount_fail_check;
2901#else /* No DFS support, return error on mount */ 3041#else /* No DFS support, return error on mount */
2902 rc = -EOPNOTSUPP; 3042 rc = -EOPNOTSUPP;
2903#endif 3043#endif
@@ -2930,10 +3070,8 @@ remote_path_check:
2930mount_fail_check: 3070mount_fail_check:
2931 /* on error free sesinfo and tcon struct if needed */ 3071 /* on error free sesinfo and tcon struct if needed */
2932 if (rc) { 3072 if (rc) {
2933 if (mount_data != mount_data_global)
2934 kfree(mount_data);
2935 /* If find_unc succeeded then rc == 0 so we can not end */ 3073 /* If find_unc succeeded then rc == 0 so we can not end */
2936 /* up accidently freeing someone elses tcon struct */ 3074 /* up accidentally freeing someone elses tcon struct */
2937 if (tcon) 3075 if (tcon)
2938 cifs_put_tcon(tcon); 3076 cifs_put_tcon(tcon);
2939 else if (pSesInfo) 3077 else if (pSesInfo)
@@ -3047,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3047 bcc_ptr += strlen("?????"); 3185 bcc_ptr += strlen("?????");
3048 bcc_ptr += 1; 3186 bcc_ptr += 1;
3049 count = bcc_ptr - &pSMB->Password[0]; 3187 count = bcc_ptr - &pSMB->Password[0];
3050 pSMB->hdr.smb_buf_length += count; 3188 pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
3189 pSMB->hdr.smb_buf_length) + count);
3051 pSMB->ByteCount = cpu_to_le16(count); 3190 pSMB->ByteCount = cpu_to_le16(count);
3052 3191
3053 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 3192 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3222,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3222 struct cifsSesInfo *ses; 3361 struct cifsSesInfo *ses;
3223 struct cifsTconInfo *tcon = NULL; 3362 struct cifsTconInfo *tcon = NULL;
3224 struct smb_vol *vol_info; 3363 struct smb_vol *vol_info;
3225 char username[MAX_USERNAME_SIZE + 1]; 3364 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3365 /* We used to have this as MAX_USERNAME which is */
3366 /* way too big now (256 instead of 32) */
3226 3367
3227 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3368 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3228 if (vol_info == NULL) { 3369 if (vol_info == NULL) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index dd5f22918c33..9ea65cf36714 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -189,7 +189,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
190 /* EIO could indicate that (posix open) operation is not 190 /* EIO could indicate that (posix open) operation is not
191 supported, despite what server claimed in capability 191 supported, despite what server claimed in capability
192 negotation. EREMOTE indicates DFS junction, which is not 192 negotiation. EREMOTE indicates DFS junction, which is not
193 handled in posix open */ 193 handled in posix open */
194 194
195 if (rc == 0) { 195 if (rc == 0) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h" 46#include "cifsfs.h"
47 47
48#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CIFS_NFSD_EXPORT
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
63 .encode_fs = */ 63 .encode_fs = */
64}; 64};
65 65
66#endif /* EXPERIMENTAL */ 66#endif /* CIFS_NFSD_EXPORT */
67 67
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c27d236738fc..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -575,8 +575,10 @@ reopen_error_exit:
575 575
576int cifs_close(struct inode *inode, struct file *file) 576int cifs_close(struct inode *inode, struct file *file)
577{ 577{
578 cifsFileInfo_put(file->private_data); 578 if (file->private_data != NULL) {
579 file->private_data = NULL; 579 cifsFileInfo_put(file->private_data);
580 file->private_data = NULL;
581 }
580 582
581 /* return code from the ->release op is always ignored */ 583 /* return code from the ->release op is always ignored */
582 return 0; 584 return 0;
@@ -855,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
855 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
856} 858}
857 859
858ssize_t cifs_user_write(struct file *file, const char __user *write_data,
859 size_t write_size, loff_t *poffset)
860{
861 struct inode *inode = file->f_path.dentry->d_inode;
862 int rc = 0;
863 unsigned int bytes_written = 0;
864 unsigned int total_written;
865 struct cifs_sb_info *cifs_sb;
866 struct cifsTconInfo *pTcon;
867 int xid;
868 struct cifsFileInfo *open_file;
869 struct cifsInodeInfo *cifsi = CIFS_I(inode);
870
871 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
872
873 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
874 *poffset, file->f_path.dentry->d_name.name); */
875
876 if (file->private_data == NULL)
877 return -EBADF;
878
879 open_file = file->private_data;
880 pTcon = tlink_tcon(open_file->tlink);
881
882 rc = generic_write_checks(file, poffset, &write_size, 0);
883 if (rc)
884 return rc;
885
886 xid = GetXid();
887
888 for (total_written = 0; write_size > total_written;
889 total_written += bytes_written) {
890 rc = -EAGAIN;
891 while (rc == -EAGAIN) {
892 if (file->private_data == NULL) {
893 /* file has been closed on us */
894 FreeXid(xid);
895 /* if we have gotten here we have written some data
896 and blocked, and the file has been freed on us while
897 we blocked so return what we managed to write */
898 return total_written;
899 }
900 if (open_file->invalidHandle) {
901 /* we could deadlock if we called
902 filemap_fdatawait from here so tell
903 reopen_file not to flush data to server
904 now */
905 rc = cifs_reopen_file(open_file, false);
906 if (rc != 0)
907 break;
908 }
909
910 rc = CIFSSMBWrite(xid, pTcon,
911 open_file->netfid,
912 min_t(const int, cifs_sb->wsize,
913 write_size - total_written),
914 *poffset, &bytes_written,
915 NULL, write_data + total_written, 0);
916 }
917 if (rc || (bytes_written == 0)) {
918 if (total_written)
919 break;
920 else {
921 FreeXid(xid);
922 return rc;
923 }
924 } else {
925 cifs_update_eof(cifsi, *poffset, bytes_written);
926 *poffset += bytes_written;
927 }
928 }
929
930 cifs_stats_bytes_written(pTcon, total_written);
931
932/* Do not update local mtime - server will set its actual value on write
933 * inode->i_ctime = inode->i_mtime =
934 * current_fs_time(inode->i_sb);*/
935 if (total_written > 0) {
936 spin_lock(&inode->i_lock);
937 if (*poffset > inode->i_size)
938 i_size_write(inode, *poffset);
939 spin_unlock(&inode->i_lock);
940 }
941 mark_inode_dirty_sync(inode);
942
943 FreeXid(xid);
944 return total_written;
945}
946
947static ssize_t cifs_write(struct cifsFileInfo *open_file, 860static ssize_t cifs_write(struct cifsFileInfo *open_file,
948 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
949 loff_t *poffset) 862 loff_t *poffset)
@@ -970,6 +883,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
970 total_written += bytes_written) { 883 total_written += bytes_written) {
971 rc = -EAGAIN; 884 rc = -EAGAIN;
972 while (rc == -EAGAIN) { 885 while (rc == -EAGAIN) {
886 struct kvec iov[2];
887 unsigned int len;
888
973 if (open_file->invalidHandle) { 889 if (open_file->invalidHandle) {
974 /* we could deadlock if we called 890 /* we could deadlock if we called
975 filemap_fdatawait from here so tell 891 filemap_fdatawait from here so tell
@@ -979,31 +895,14 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
979 if (rc != 0) 895 if (rc != 0)
980 break; 896 break;
981 } 897 }
982 if (experimEnabled || (pTcon->ses->server && 898
983 ((pTcon->ses->server->secMode & 899 len = min((size_t)cifs_sb->wsize,
984 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 900 write_size - total_written);
985 == 0))) { 901 /* iov[0] is reserved for smb header */
986 struct kvec iov[2]; 902 iov[1].iov_base = (char *)write_data + total_written;
987 unsigned int len; 903 iov[1].iov_len = len;
988 904 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len,
989 len = min((size_t)cifs_sb->wsize, 905 *poffset, &bytes_written, iov, 1, 0);
990 write_size - total_written);
991 /* iov[0] is reserved for smb header */
992 iov[1].iov_base = (char *)write_data +
993 total_written;
994 iov[1].iov_len = len;
995 rc = CIFSSMBWrite2(xid, pTcon,
996 open_file->netfid, len,
997 *poffset, &bytes_written,
998 iov, 1, 0);
999 } else
1000 rc = CIFSSMBWrite(xid, pTcon,
1001 open_file->netfid,
1002 min_t(const int, cifs_sb->wsize,
1003 write_size - total_written),
1004 *poffset, &bytes_written,
1005 write_data + total_written,
1006 NULL, 0);
1007 } 906 }
1008 if (rc || (bytes_written == 0)) { 907 if (rc || (bytes_written == 0)) {
1009 if (total_written) 908 if (total_written)
@@ -1240,12 +1139,6 @@ static int cifs_writepages(struct address_space *mapping,
1240 } 1139 }
1241 1140
1242 tcon = tlink_tcon(open_file->tlink); 1141 tcon = tlink_tcon(open_file->tlink);
1243 if (!experimEnabled && tcon->ses->server->secMode &
1244 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1245 cifsFileInfo_put(open_file);
1246 kfree(iov);
1247 return generic_writepages(mapping, wbc);
1248 }
1249 cifsFileInfo_put(open_file); 1142 cifsFileInfo_put(open_file);
1250 1143
1251 xid = GetXid(); 1144 xid = GetXid();
@@ -1438,9 +1331,10 @@ retry_write:
1438 return rc; 1331 return rc;
1439} 1332}
1440 1333
1441static int cifs_writepage(struct page *page, struct writeback_control *wbc) 1334static int
1335cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1442{ 1336{
1443 int rc = -EFAULT; 1337 int rc;
1444 int xid; 1338 int xid;
1445 1339
1446 xid = GetXid(); 1340 xid = GetXid();
@@ -1460,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1460 * to fail to update with the state of the page correctly. 1354 * to fail to update with the state of the page correctly.
1461 */ 1355 */
1462 set_page_writeback(page); 1356 set_page_writeback(page);
1357retry_write:
1463 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); 1358 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1464 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ 1359 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1465 unlock_page(page); 1360 goto retry_write;
1361 else if (rc == -EAGAIN)
1362 redirty_page_for_writepage(wbc, page);
1363 else if (rc != 0)
1364 SetPageError(page);
1365 else
1366 SetPageUptodate(page);
1466 end_page_writeback(page); 1367 end_page_writeback(page);
1467 page_cache_release(page); 1368 page_cache_release(page);
1468 FreeXid(xid); 1369 FreeXid(xid);
1469 return rc; 1370 return rc;
1470} 1371}
1471 1372
1373static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1374{
1375 int rc = cifs_writepage_locked(page, wbc);
1376 unlock_page(page);
1377 return rc;
1378}
1379
1472static int cifs_write_end(struct file *file, struct address_space *mapping, 1380static int cifs_write_end(struct file *file, struct address_space *mapping,
1473 loff_t pos, unsigned len, unsigned copied, 1381 loff_t pos, unsigned len, unsigned copied,
1474 struct page *page, void *fsdata) 1382 struct page *page, void *fsdata)
@@ -1537,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
1537 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1445 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1538 file->f_path.dentry->d_name.name, datasync); 1446 file->f_path.dentry->d_name.name, datasync);
1539 1447
1540 if (!CIFS_I(inode)->clientCanCacheRead) 1448 if (!CIFS_I(inode)->clientCanCacheRead) {
1541 cifs_invalidate_mapping(inode); 1449 rc = cifs_invalidate_mapping(inode);
1450 if (rc) {
1451 cFYI(1, "rc: %d during invalidate phase", rc);
1452 rc = 0; /* don't care about it in fsync */
1453 }
1454 }
1542 1455
1543 tcon = tlink_tcon(smbfile->tlink); 1456 tcon = tlink_tcon(smbfile->tlink);
1544 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 1457 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1744,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1744 return total_written; 1657 return total_written;
1745} 1658}
1746 1659
1747static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 1660ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1748 unsigned long nr_segs, loff_t pos) 1661 unsigned long nr_segs, loff_t pos)
1749{ 1662{
1750 ssize_t written; 1663 ssize_t written;
@@ -1867,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1867 return total_read; 1780 return total_read;
1868} 1781}
1869 1782
1870ssize_t cifs_user_read(struct file *file, char __user *read_data, 1783ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1871 size_t read_size, loff_t *poffset)
1872{
1873 struct iovec iov;
1874 iov.iov_base = read_data;
1875 iov.iov_len = read_size;
1876
1877 return cifs_iovec_read(file, &iov, 1, poffset);
1878}
1879
1880static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1881 unsigned long nr_segs, loff_t pos) 1784 unsigned long nr_segs, loff_t pos)
1882{ 1785{
1883 ssize_t read; 1786 ssize_t read;
@@ -1980,6 +1883,24 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1980 return total_read; 1883 return total_read;
1981} 1884}
1982 1885
1886/*
1887 * If the page is mmap'ed into a process' page tables, then we need to make
1888 * sure that it doesn't change while being written back.
1889 */
1890static int
1891cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1892{
1893 struct page *page = vmf->page;
1894
1895 lock_page(page);
1896 return VM_FAULT_LOCKED;
1897}
1898
1899static struct vm_operations_struct cifs_file_vm_ops = {
1900 .fault = filemap_fault,
1901 .page_mkwrite = cifs_page_mkwrite,
1902};
1903
1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 1904int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1984{ 1905{
1985 int rc, xid; 1906 int rc, xid;
@@ -1987,10 +1908,15 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1987 1908
1988 xid = GetXid(); 1909 xid = GetXid();
1989 1910
1990 if (!CIFS_I(inode)->clientCanCacheRead) 1911 if (!CIFS_I(inode)->clientCanCacheRead) {
1991 cifs_invalidate_mapping(inode); 1912 rc = cifs_invalidate_mapping(inode);
1913 if (rc)
1914 return rc;
1915 }
1992 1916
1993 rc = generic_file_mmap(file, vma); 1917 rc = generic_file_mmap(file, vma);
1918 if (rc == 0)
1919 vma->vm_ops = &cifs_file_vm_ops;
1994 FreeXid(xid); 1920 FreeXid(xid);
1995 return rc; 1921 return rc;
1996} 1922}
@@ -2007,6 +1933,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2007 return rc; 1933 return rc;
2008 } 1934 }
2009 rc = generic_file_mmap(file, vma); 1935 rc = generic_file_mmap(file, vma);
1936 if (rc == 0)
1937 vma->vm_ops = &cifs_file_vm_ops;
2010 FreeXid(xid); 1938 FreeXid(xid);
2011 return rc; 1939 return rc;
2012} 1940}
@@ -2411,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
2411 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); 2339 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2412} 2340}
2413 2341
2342static int cifs_launder_page(struct page *page)
2343{
2344 int rc = 0;
2345 loff_t range_start = page_offset(page);
2346 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2347 struct writeback_control wbc = {
2348 .sync_mode = WB_SYNC_ALL,
2349 .nr_to_write = 0,
2350 .range_start = range_start,
2351 .range_end = range_end,
2352 };
2353
2354 cFYI(1, "Launder page: %p", page);
2355
2356 if (clear_page_dirty_for_io(page))
2357 rc = cifs_writepage_locked(page, &wbc);
2358
2359 cifs_fscache_invalidate_page(page, page->mapping->host);
2360 return rc;
2361}
2362
2414void cifs_oplock_break(struct work_struct *work) 2363void cifs_oplock_break(struct work_struct *work)
2415{ 2364{
2416 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2365 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2482,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
2482 .set_page_dirty = __set_page_dirty_nobuffers, 2431 .set_page_dirty = __set_page_dirty_nobuffers,
2483 .releasepage = cifs_release_page, 2432 .releasepage = cifs_release_page,
2484 .invalidatepage = cifs_invalidate_page, 2433 .invalidatepage = cifs_invalidate_page,
2485 /* .direct_IO = */ 2434 .launder_page = cifs_launder_page,
2486}; 2435};
2487 2436
2488/* 2437/*
@@ -2499,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2499 .set_page_dirty = __set_page_dirty_nobuffers, 2448 .set_page_dirty = __set_page_dirty_nobuffers,
2500 .releasepage = cifs_release_page, 2449 .releasepage = cifs_release_page,
2501 .invalidatepage = cifs_invalidate_page, 2450 .invalidatepage = cifs_invalidate_page,
2502 /* .direct_IO = */ 2451 .launder_page = cifs_launder_page,
2503}; 2452};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
878} 878}
879 879
880/* gets root inode */ 880/* gets root inode */
881struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 881struct inode *cifs_root_iget(struct super_block *sb)
882{ 882{
883 int xid; 883 int xid;
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
1683/* 1683/*
1684 * Zap the cache. Called when invalid_mapping flag is set. 1684 * Zap the cache. Called when invalid_mapping flag is set.
1685 */ 1685 */
1686void 1686int
1687cifs_invalidate_mapping(struct inode *inode) 1687cifs_invalidate_mapping(struct inode *inode)
1688{ 1688{
1689 int rc; 1689 int rc = 0;
1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1691 1691
1692 cifs_i->invalid_mapping = false; 1692 cifs_i->invalid_mapping = false;
1693 1693
1694 /* write back any cached data */
1695 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1694 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1696 rc = filemap_write_and_wait(inode->i_mapping); 1695 rc = invalidate_inode_pages2(inode->i_mapping);
1697 mapping_set_error(inode->i_mapping, rc); 1696 if (rc) {
1697 cERROR(1, "%s: could not invalidate inode %p", __func__,
1698 inode);
1699 cifs_i->invalid_mapping = true;
1700 }
1698 } 1701 }
1699 invalidate_remote_inode(inode); 1702
1700 cifs_fscache_reset_inode_cookie(inode); 1703 cifs_fscache_reset_inode_cookie(inode);
1704 return rc;
1701} 1705}
1702 1706
1703int cifs_revalidate_file(struct file *filp) 1707int cifs_revalidate_file_attr(struct file *filp)
1704{ 1708{
1705 int rc = 0; 1709 int rc = 0;
1706 struct inode *inode = filp->f_path.dentry->d_inode; 1710 struct inode *inode = filp->f_path.dentry->d_inode;
1707 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1708 1712
1709 if (!cifs_inode_needs_reval(inode)) 1713 if (!cifs_inode_needs_reval(inode))
1710 goto check_inval; 1714 return rc;
1711 1715
1712 if (tlink_tcon(cfile->tlink)->unix_ext) 1716 if (tlink_tcon(cfile->tlink)->unix_ext)
1713 rc = cifs_get_file_info_unix(filp); 1717 rc = cifs_get_file_info_unix(filp);
1714 else 1718 else
1715 rc = cifs_get_file_info(filp); 1719 rc = cifs_get_file_info(filp);
1716 1720
1717check_inval:
1718 if (CIFS_I(inode)->invalid_mapping)
1719 cifs_invalidate_mapping(inode);
1720
1721 return rc; 1721 return rc;
1722} 1722}
1723 1723
1724/* revalidate a dentry's inode attributes */ 1724int cifs_revalidate_dentry_attr(struct dentry *dentry)
1725int cifs_revalidate_dentry(struct dentry *dentry)
1726{ 1725{
1727 int xid; 1726 int xid;
1728 int rc = 0; 1727 int rc = 0;
1729 char *full_path = NULL;
1730 struct inode *inode = dentry->d_inode; 1728 struct inode *inode = dentry->d_inode;
1731 struct super_block *sb = dentry->d_sb; 1729 struct super_block *sb = dentry->d_sb;
1730 char *full_path = NULL;
1732 1731
1733 if (inode == NULL) 1732 if (inode == NULL)
1734 return -ENOENT; 1733 return -ENOENT;
1735 1734
1736 xid = GetXid();
1737
1738 if (!cifs_inode_needs_reval(inode)) 1735 if (!cifs_inode_needs_reval(inode))
1739 goto check_inval; 1736 return rc;
1737
1738 xid = GetXid();
1740 1739
1741 /* can not safely grab the rename sem here if rename calls revalidate 1740 /* can not safely grab the rename sem here if rename calls revalidate
1742 since that would deadlock */ 1741 since that would deadlock */
1743 full_path = build_path_from_dentry(dentry); 1742 full_path = build_path_from_dentry(dentry);
1744 if (full_path == NULL) { 1743 if (full_path == NULL) {
1745 rc = -ENOMEM; 1744 rc = -ENOMEM;
1746 goto check_inval; 1745 goto out;
1747 } 1746 }
1748 1747
1749 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1748 cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
1750 "jiffies %ld", full_path, inode, inode->i_count.counter, 1749 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
1751 dentry, dentry->d_time, jiffies); 1750 dentry, dentry->d_time, jiffies);
1752 1751
1753 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) 1752 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1756 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 1755 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
1757 xid, NULL); 1756 xid, NULL);
1758 1757
1759check_inval: 1758out:
1760 if (CIFS_I(inode)->invalid_mapping)
1761 cifs_invalidate_mapping(inode);
1762
1763 kfree(full_path); 1759 kfree(full_path);
1764 FreeXid(xid); 1760 FreeXid(xid);
1765 return rc; 1761 return rc;
1766} 1762}
1767 1763
1764int cifs_revalidate_file(struct file *filp)
1765{
1766 int rc;
1767 struct inode *inode = filp->f_path.dentry->d_inode;
1768
1769 rc = cifs_revalidate_file_attr(filp);
1770 if (rc)
1771 return rc;
1772
1773 if (CIFS_I(inode)->invalid_mapping)
1774 rc = cifs_invalidate_mapping(inode);
1775 return rc;
1776}
1777
1778/* revalidate a dentry's inode attributes */
1779int cifs_revalidate_dentry(struct dentry *dentry)
1780{
1781 int rc;
1782 struct inode *inode = dentry->d_inode;
1783
1784 rc = cifs_revalidate_dentry_attr(dentry);
1785 if (rc)
1786 return rc;
1787
1788 if (CIFS_I(inode)->invalid_mapping)
1789 rc = cifs_invalidate_mapping(inode);
1790 return rc;
1791}
1792
1768int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1793int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1769 struct kstat *stat) 1794 struct kstat *stat)
1770{ 1795{
1771 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1796 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1772 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1797 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
1773 int err = cifs_revalidate_dentry(dentry); 1798 struct inode *inode = dentry->d_inode;
1774 1799 int rc;
1775 if (!err) {
1776 generic_fillattr(dentry->d_inode, stat);
1777 stat->blksize = CIFS_MAX_MSGSIZE;
1778 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1779 1800
1780 /* 1801 /*
1781 * If on a multiuser mount without unix extensions, and the 1802 * We need to be sure that all dirty pages are written and the server
1782 * admin hasn't overridden them, set the ownership to the 1803 * has actual ctime, mtime and file length.
1783 * fsuid/fsgid of the current process. 1804 */
1784 */ 1805 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
1785 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && 1806 inode->i_mapping->nrpages != 0) {
1786 !tcon->unix_ext) { 1807 rc = filemap_fdatawait(inode->i_mapping);
1787 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) 1808 if (rc) {
1788 stat->uid = current_fsuid(); 1809 mapping_set_error(inode->i_mapping, rc);
1789 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) 1810 return rc;
1790 stat->gid = current_fsgid();
1791 } 1811 }
1792 } 1812 }
1793 return err; 1813
1814 rc = cifs_revalidate_dentry_attr(dentry);
1815 if (rc)
1816 return rc;
1817
1818 generic_fillattr(inode, stat);
1819 stat->blksize = CIFS_MAX_MSGSIZE;
1820 stat->ino = CIFS_I(inode)->uniqueid;
1821
1822 /*
1823 * If on a multiuser mount without unix extensions, and the admin hasn't
1824 * overridden them, set the ownership to the fsuid/fsgid of the current
1825 * process.
1826 */
1827 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
1828 !tcon->unix_ext) {
1829 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
1830 stat->uid = current_fsuid();
1831 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
1832 stat->gid = current_fsgid();
1833 }
1834 return rc;
1794} 1835}
1795 1836
1796static int cifs_truncate_page(struct address_space *mapping, loff_t from) 1837static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e8804d373404..ce417a9764a3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -239,7 +239,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
239 if (rc != 0) 239 if (rc != 0)
240 return rc; 240 return rc;
241 241
242 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 242 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
243 CIFSSMBClose(xid, tcon, netfid); 243 CIFSSMBClose(xid, tcon, netfid);
244 /* it's not a symlink */ 244 /* it's not a symlink */
245 return -EINVAL; 245 return -EINVAL;
@@ -316,7 +316,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
316 if (rc != 0) 316 if (rc != 0)
317 goto out; 317 goto out;
318 318
319 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 319 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
320 CIFSSMBClose(xid, pTcon, netfid); 320 CIFSSMBClose(xid, pTcon, netfid);
321 /* it's not a symlink */ 321 /* it's not a symlink */
322 goto out; 322 goto out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a930a752a78..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -100,6 +100,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
100 memset(buf_to_free->password, 0, strlen(buf_to_free->password)); 100 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
101 kfree(buf_to_free->password); 101 kfree(buf_to_free->password);
102 } 102 }
103 kfree(buf_to_free->user_name);
103 kfree(buf_to_free->domainName); 104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
@@ -303,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
303 304
304 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
305 306
306 buffer->smb_buf_length = 307 buffer->smb_buf_length = cpu_to_be32(
307 (2 * word_count) + sizeof(struct smb_hdr) - 308 (2 * word_count) + sizeof(struct smb_hdr) -
308 4 /* RFC 1001 length field does not count */ + 309 4 /* RFC 1001 length field does not count */ +
309 2 /* for bcc field itself */ ; 310 2 /* for bcc field itself */) ;
310 /* Note that this is the only network field that has to be converted
311 to big endian and it is done just before we send it */
312 311
313 buffer->Protocol[0] = 0xFF; 312 buffer->Protocol[0] = 0xFF;
314 buffer->Protocol[1] = 'S'; 313 buffer->Protocol[1] = 'S';
@@ -423,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
423int 422int
424checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) 423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
425{ 424{
426 __u32 len = smb->smb_buf_length; 425 __u32 len = be32_to_cpu(smb->smb_buf_length);
427 __u32 clc_len; /* calculated length */ 426 __u32 clc_len; /* calculated length */
428 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); 427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
429 428
@@ -463,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
463 462
464 if (check_smb_hdr(smb, mid)) 463 if (check_smb_hdr(smb, mid))
465 return 1; 464 return 1;
466 clc_len = smbCalcSize_LE(smb); 465 clc_len = smbCalcSize(smb);
467 466
468 if (4 + len != length) { 467 if (4 + len != length) {
469 cERROR(1, "Length read does not match RFC1001 length %d", 468 cERROR(1, "Length read does not match RFC1001 length %d",
@@ -520,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
520 (struct smb_com_transaction_change_notify_rsp *)buf; 519 (struct smb_com_transaction_change_notify_rsp *)buf;
521 struct file_notify_information *pnotify; 520 struct file_notify_information *pnotify;
522 __u32 data_offset = 0; 521 __u32 data_offset = 0;
523 if (pSMBr->ByteCount > sizeof(struct file_notify_information)) { 522 if (get_bcc(buf) > sizeof(struct file_notify_information)) {
524 data_offset = le32_to_cpu(pSMBr->DataOffset); 523 data_offset = le32_to_cpu(pSMBr->DataOffset);
525 524
526 pnotify = (struct file_notify_information *) 525 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
919 2 /* size of the bcc field */ + get_bcc(ptr)); 919 2 /* size of the bcc field */ + get_bcc(ptr));
920} 920}
921 921
922unsigned int
923smbCalcSize_LE(struct smb_hdr *ptr)
924{
925 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
926 2 /* size of the bcc field */ + get_bcc_le(ptr));
927}
928
929/* The following are taken from fs/ntfs/util.c */ 922/* The following are taken from fs/ntfs/util.c */
930 923
931#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 924#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 16765703131b..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -219,12 +219,12 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
219 bcc_ptr++; 219 bcc_ptr++;
220 } */ 220 } */
221 /* copy user */ 221 /* copy user */
222 if (ses->userName == NULL) { 222 if (ses->user_name == NULL) {
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name,
228 MAX_USERNAME_SIZE, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
@@ -244,12 +244,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
244 /* copy user */ 244 /* copy user */
245 /* BB what about null user mounts - check that we do this BB */ 245 /* BB what about null user mounts - check that we do this BB */
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->user_name != NULL)
248 /* BB what about null user mounts - check that we do this BB */ 248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
249 } else { 249 /* else null user mount */
250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE); 250
251 } 251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 *bcc_ptr = 0; 252 *bcc_ptr = 0;
254 bcc_ptr++; /* account for null termination */ 253 bcc_ptr++; /* account for null termination */
255 254
@@ -277,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
277} 276}
278 277
279static void 278static void
280decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, 279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
281 const struct nls_table *nls_cp) 280 const struct nls_table *nls_cp)
282{ 281{
283 int len; 282 int len;
@@ -285,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
285 284
286 cFYI(1, "bleft %d", bleft); 285 cFYI(1, "bleft %d", bleft);
287 286
288 /*
289 * Windows servers do not always double null terminate their final
290 * Unicode string. Check to see if there are an uneven number of bytes
291 * left. If so, then add an extra NULL pad byte to the end of the
292 * response.
293 *
294 * See section 2.7.2 in "Implementing CIFS" for details
295 */
296 if (bleft % 2) {
297 data[bleft] = 0;
298 ++bleft;
299 }
300
301 kfree(ses->serverOS); 287 kfree(ses->serverOS);
302 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 288 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
303 cFYI(1, "serverOS=%s", ses->serverOS); 289 cFYI(1, "serverOS=%s", ses->serverOS);
@@ -405,8 +391,8 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 391 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
406 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 392 we must set the MIC field of the AUTHENTICATE_MESSAGE */
407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); 393 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 394 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 395 tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
410 if (tilen) { 396 if (tilen) {
411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); 397 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
412 if (!ses->auth_key.response) { 398 if (!ses->auth_key.response) {
@@ -523,14 +509,14 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
523 tmp += len; 509 tmp += len;
524 } 510 }
525 511
526 if (ses->userName == NULL) { 512 if (ses->user_name == NULL) {
527 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 513 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
528 sec_blob->UserName.Length = 0; 514 sec_blob->UserName.Length = 0;
529 sec_blob->UserName.MaximumLength = 0; 515 sec_blob->UserName.MaximumLength = 0;
530 tmp += 2; 516 tmp += 2;
531 } else { 517 } else {
532 int len; 518 int len;
533 len = cifs_strtoUCS((__le16 *)tmp, ses->userName, 519 len = cifs_strtoUCS((__le16 *)tmp, ses->user_name,
534 MAX_USERNAME_SIZE, nls_cp); 520 MAX_USERNAME_SIZE, nls_cp);
535 len *= 2; /* unicode is 2 bytes each */ 521 len *= 2; /* unicode is 2 bytes each */
536 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 522 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -635,7 +621,7 @@ ssetup_ntlmssp_authenticate:
635 and rest of bcc area. This allows us to avoid 621 and rest of bcc area. This allows us to avoid
636 a large buffer 17K allocation */ 622 a large buffer 17K allocation */
637 iov[0].iov_base = (char *)pSMB; 623 iov[0].iov_base = (char *)pSMB;
638 iov[0].iov_len = smb_buf->smb_buf_length + 4; 624 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
639 625
640 /* setting this here allows the code at the end of the function 626 /* setting this here allows the code at the end of the function
641 to free the request buffer if there's an error */ 627 to free the request buffer if there's an error */
@@ -670,7 +656,7 @@ ssetup_ntlmssp_authenticate:
670 * to use challenge/response method (i.e. Password bit is 1). 656 * to use challenge/response method (i.e. Password bit is 1).
671 */ 657 */
672 658
673 calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
674 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->secMode & SECMODE_PW_ENCRYPT ?
675 true : false, lnm_session_key); 661 true : false, lnm_session_key);
676 662
@@ -873,9 +859,10 @@ ssetup_ntlmssp_authenticate:
873 iov[2].iov_len = (long) bcc_ptr - (long) str_area; 859 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
874 860
875 count = iov[1].iov_len + iov[2].iov_len; 861 count = iov[1].iov_len + iov[2].iov_len;
876 smb_buf->smb_buf_length += count; 862 smb_buf->smb_buf_length =
863 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
877 864
878 put_bcc_le(count, smb_buf); 865 put_bcc(count, smb_buf);
879 866
880 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 867 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
881 CIFS_LOG_ERROR); 868 CIFS_LOG_ERROR);
@@ -930,7 +917,9 @@ ssetup_ntlmssp_authenticate:
930 } 917 }
931 918
932 /* BB check if Unicode and decode strings */ 919 /* BB check if Unicode and decode strings */
933 if (smb_buf->Flags2 & SMBFLG2_UNICODE) { 920 if (bytes_remaining == 0) {
921 /* no string area to decode, do nothing */
922 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
934 /* unicode string area must be word-aligned */ 923 /* unicode string area must be word-aligned */
935 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { 924 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
936 ++bcc_ptr; 925 ++bcc_ptr;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4
5 a partial implementation of DES designed for use in the
6 SMB authentication protocol
7
8 Copyright (C) Andrew Tridgell 1998
9 Modified by Steve French (sfrench@us.ibm.com) 2002,2004
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26/* NOTES:
27
28 This code makes no attempt to be fast! In fact, it is a very
29 slow implementation
30
31 This code is NOT a complete DES implementation. It implements only
32 the minimum necessary for SMB authentication, as used by all SMB
33 products (including every copy of Microsoft Windows95 ever sold)
34
35 In particular, it can only do a unchained forward DES pass. This
36 means it is not possible to use this code for encryption/decryption
37 of data, instead it is only useful as a "hash" algorithm.
38
39 There is no entry point into this code that allows normal DES operation.
40
41 I believe this means that this code does not come under ITAR
42 regulations but this is NOT a legal opinion. If you are concerned
43 about the applicability of ITAR regulations to this code then you
44 should confirm it for yourself (and maybe let me know if you come
45 up with a different answer to the one above)
46*/
47#include <linux/slab.h>
48#define uchar unsigned char
49
50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
51 1, 58, 50, 42, 34, 26, 18,
52 10, 2, 59, 51, 43, 35, 27,
53 19, 11, 3, 60, 52, 44, 36,
54 63, 55, 47, 39, 31, 23, 15,
55 7, 62, 54, 46, 38, 30, 22,
56 14, 6, 61, 53, 45, 37, 29,
57 21, 13, 5, 28, 20, 12, 4
58};
59
60static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
61 3, 28, 15, 6, 21, 10,
62 23, 19, 12, 4, 26, 8,
63 16, 7, 27, 20, 13, 2,
64 41, 52, 31, 37, 47, 55,
65 30, 40, 51, 45, 33, 48,
66 44, 49, 39, 56, 34, 53,
67 46, 42, 50, 36, 29, 32
68};
69
70static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
71 60, 52, 44, 36, 28, 20, 12, 4,
72 62, 54, 46, 38, 30, 22, 14, 6,
73 64, 56, 48, 40, 32, 24, 16, 8,
74 57, 49, 41, 33, 25, 17, 9, 1,
75 59, 51, 43, 35, 27, 19, 11, 3,
76 61, 53, 45, 37, 29, 21, 13, 5,
77 63, 55, 47, 39, 31, 23, 15, 7
78};
79
80static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
81 4, 5, 6, 7, 8, 9,
82 8, 9, 10, 11, 12, 13,
83 12, 13, 14, 15, 16, 17,
84 16, 17, 18, 19, 20, 21,
85 20, 21, 22, 23, 24, 25,
86 24, 25, 26, 27, 28, 29,
87 28, 29, 30, 31, 32, 1
88};
89
90static uchar perm5[32] = { 16, 7, 20, 21,
91 29, 12, 28, 17,
92 1, 15, 23, 26,
93 5, 18, 31, 10,
94 2, 8, 24, 14,
95 32, 27, 3, 9,
96 19, 13, 30, 6,
97 22, 11, 4, 25
98};
99
100static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
101 39, 7, 47, 15, 55, 23, 63, 31,
102 38, 6, 46, 14, 54, 22, 62, 30,
103 37, 5, 45, 13, 53, 21, 61, 29,
104 36, 4, 44, 12, 52, 20, 60, 28,
105 35, 3, 43, 11, 51, 19, 59, 27,
106 34, 2, 42, 10, 50, 18, 58, 26,
107 33, 1, 41, 9, 49, 17, 57, 25
108};
109
110static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
111
112static uchar sbox[8][4][16] = {
113 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
114 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
115 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
116 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
117
118 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
119 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
120 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
121 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
122
123 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
124 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
125 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
126 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
127
128 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
129 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
130 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
131 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
132
133 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
134 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
135 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
136 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
137
138 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
139 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
140 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
141 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
142
143 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
144 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
145 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
146 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
147
148 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
149 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
150 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
151 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
152};
153
154static void
155permute(char *out, char *in, uchar *p, int n)
156{
157 int i;
158 for (i = 0; i < n; i++)
159 out[i] = in[p[i] - 1];
160}
161
162static void
163lshift(char *d, int count, int n)
164{
165 char out[64];
166 int i;
167 for (i = 0; i < n; i++)
168 out[i] = d[(i + count) % n];
169 for (i = 0; i < n; i++)
170 d[i] = out[i];
171}
172
173static void
174concat(char *out, char *in1, char *in2, int l1, int l2)
175{
176 while (l1--)
177 *out++ = *in1++;
178 while (l2--)
179 *out++ = *in2++;
180}
181
182static void
183xor(char *out, char *in1, char *in2, int n)
184{
185 int i;
186 for (i = 0; i < n; i++)
187 out[i] = in1[i] ^ in2[i];
188}
189
190static void
191dohash(char *out, char *in, char *key, int forw)
192{
193 int i, j, k;
194 char *pk1;
195 char c[28];
196 char d[28];
197 char *cd;
198 char (*ki)[48];
199 char *pd1;
200 char l[32], r[32];
201 char *rl;
202
203 /* Have to reduce stack usage */
204 pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
205 if (pk1 == NULL)
206 return;
207
208 ki = kmalloc(16*48, GFP_KERNEL);
209 if (ki == NULL) {
210 kfree(pk1);
211 return;
212 }
213
214 cd = pk1 + 56;
215 pd1 = cd + 56;
216 rl = pd1 + 64;
217
218 permute(pk1, key, perm1, 56);
219
220 for (i = 0; i < 28; i++)
221 c[i] = pk1[i];
222 for (i = 0; i < 28; i++)
223 d[i] = pk1[i + 28];
224
225 for (i = 0; i < 16; i++) {
226 lshift(c, sc[i], 28);
227 lshift(d, sc[i], 28);
228
229 concat(cd, c, d, 28, 28);
230 permute(ki[i], cd, perm2, 48);
231 }
232
233 permute(pd1, in, perm3, 64);
234
235 for (j = 0; j < 32; j++) {
236 l[j] = pd1[j];
237 r[j] = pd1[j + 32];
238 }
239
240 for (i = 0; i < 16; i++) {
241 char *er; /* er[48] */
242 char *erk; /* erk[48] */
243 char b[8][6];
244 char *cb; /* cb[32] */
245 char *pcb; /* pcb[32] */
246 char *r2; /* r2[32] */
247
248 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
249 if (er == NULL) {
250 kfree(pk1);
251 kfree(ki);
252 return;
253 }
254 erk = er+48;
255 cb = erk+48;
256 pcb = cb+32;
257 r2 = pcb+32;
258
259 permute(er, r, perm4, 48);
260
261 xor(erk, er, ki[forw ? i : 15 - i], 48);
262
263 for (j = 0; j < 8; j++)
264 for (k = 0; k < 6; k++)
265 b[j][k] = erk[j * 6 + k];
266
267 for (j = 0; j < 8; j++) {
268 int m, n;
269 m = (b[j][0] << 1) | b[j][5];
270
271 n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
272 1) | b[j][4];
273
274 for (k = 0; k < 4; k++)
275 b[j][k] =
276 (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
277 }
278
279 for (j = 0; j < 8; j++)
280 for (k = 0; k < 4; k++)
281 cb[j * 4 + k] = b[j][k];
282 permute(pcb, cb, perm5, 32);
283
284 xor(r2, l, pcb, 32);
285
286 for (j = 0; j < 32; j++)
287 l[j] = r[j];
288
289 for (j = 0; j < 32; j++)
290 r[j] = r2[j];
291
292 kfree(er);
293 }
294
295 concat(rl, r, l, 32, 32);
296
297 permute(out, rl, perm6, 64);
298 kfree(pk1);
299 kfree(ki);
300}
301
302static void
303str_to_key(unsigned char *str, unsigned char *key)
304{
305 int i;
306
307 key[0] = str[0] >> 1;
308 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
309 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
310 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
311 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
312 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
313 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
314 key[7] = str[6] & 0x7F;
315 for (i = 0; i < 8; i++)
316 key[i] = (key[i] << 1);
317}
318
319static void
320smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
321 int forw)
322{
323 int i;
324 char *outb; /* outb[64] */
325 char *inb; /* inb[64] */
326 char *keyb; /* keyb[64] */
327 unsigned char key2[8];
328
329 outb = kmalloc(64 * 3, GFP_KERNEL);
330 if (outb == NULL)
331 return;
332
333 inb = outb + 64;
334 keyb = inb + 64;
335
336 str_to_key(key, key2);
337
338 for (i = 0; i < 64; i++) {
339 inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
340 keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
341 outb[i] = 0;
342 }
343
344 dohash(outb, inb, keyb, forw);
345
346 for (i = 0; i < 8; i++)
347 out[i] = 0;
348
349 for (i = 0; i < 64; i++) {
350 if (outb[i])
351 out[i / 8] |= (1 << (7 - (i % 8)));
352 }
353 kfree(outb);
354}
355
356void
357E_P16(unsigned char *p14, unsigned char *p16)
358{
359 unsigned char sp8[8] =
360 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
361 smbhash(p16, sp8, p14, 1);
362 smbhash(p16 + 8, sp8, p14 + 7, 1);
363}
364
365void
366E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{
368 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1);
370 smbhash(p24 + 16, c8, p21 + 14, 1);
371}
372
373#if 0 /* currently unused */
374static void
375D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
376{
377 smbhash(out, in, p14, 0);
378 smbhash(out + 8, in + 8, p14 + 7, 0);
379}
380
381static void
382E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
383{
384 smbhash(out, in, p14, 1);
385 smbhash(out + 8, in + 8, p14 + 7, 1);
386}
387/* these routines are currently unneeded, but may be
388 needed later */
389void
390cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
391{
392 unsigned char buf[8];
393
394 smbhash(buf, in, key, 1);
395 smbhash(out, buf, key + 9, 1);
396}
397
398void
399cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
400{
401 unsigned char buf[8];
402 static unsigned char key2[8];
403
404 smbhash(buf, in, key, 1);
405 key2[0] = key[7];
406 smbhash(out, buf, key2, 1);
407}
408
409void
410cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
411{
412 static unsigned char key2[8];
413
414 smbhash(out, in, key, forw);
415 key2[0] = key[7];
416 smbhash(out + 8, in + 8, key2, forw);
417}
418#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
49 49
50static void
51str_to_key(unsigned char *str, unsigned char *key)
52{
53 int i;
54
55 key[0] = str[0] >> 1;
56 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
57 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
58 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
59 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
60 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
61 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
62 key[7] = str[6] & 0x7F;
63 for (i = 0; i < 8; i++)
64 key[i] = (key[i] << 1);
65}
66
67static int
68smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
69{
70 int rc;
71 unsigned char key2[8];
72 struct crypto_blkcipher *tfm_des;
73 struct scatterlist sgin, sgout;
74 struct blkcipher_desc desc;
75
76 str_to_key(key, key2);
77
78 tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
79 if (IS_ERR(tfm_des)) {
80 rc = PTR_ERR(tfm_des);
81 cERROR(1, "could not allocate des crypto API\n");
82 goto smbhash_err;
83 }
84
85 desc.tfm = tfm_des;
86
87 crypto_blkcipher_setkey(tfm_des, key2, 8);
88
89 sg_init_one(&sgin, in, 8);
90 sg_init_one(&sgout, out, 8);
91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) {
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98
99smbhash_err:
100 return rc;
101}
102
103static int
104E_P16(unsigned char *p14, unsigned char *p16)
105{
106 int rc;
107 unsigned char sp8[8] =
108 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
109
110 rc = smbhash(p16, sp8, p14);
111 if (rc)
112 return rc;
113 rc = smbhash(p16 + 8, sp8, p14 + 7);
114 return rc;
115}
116
117static int
118E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
119{
120 int rc;
121
122 rc = smbhash(p24, c8, p21);
123 if (rc)
124 return rc;
125 rc = smbhash(p24 + 8, c8, p21 + 7);
126 if (rc)
127 return rc;
128 rc = smbhash(p24 + 16, c8, p21 + 14);
129 return rc;
130}
131
50/* produce a md4 message digest from data of length n bytes */ 132/* produce a md4 message digest from data of length n bytes */
51int 133int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) 134mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
87 return rc; 169 return rc;
88} 170}
89 171
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
102
103/* 172/*
104 This implements the X/Open SMB password encryption 173 This implements the X/Open SMB password encryption
105 It takes a password, a 8 byte "crypt key" and puts 24 bytes of 174 It takes a password, a 8 byte "crypt key" and puts 24 bytes of
106 encrypted password into p24 */ 175 encrypted password into p24 */
107/* Note that password must be uppercased and null terminated */ 176/* Note that password must be uppercased and null terminated */
108void 177int
109SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24) 178SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
110{ 179{
111 unsigned char p14[15], p21[21]; 180 int rc;
181 unsigned char p14[14], p16[16], p21[21];
112 182
113 memset(p21, '\0', 21);
114 memset(p14, '\0', 14); 183 memset(p14, '\0', 14);
115 strncpy((char *) p14, (char *) passwd, 14); 184 memset(p16, '\0', 16);
185 memset(p21, '\0', 21);
116 186
117/* strupper((char *)p14); *//* BB at least uppercase the easy range */ 187 memcpy(p14, passwd, 14);
118 E_P16(p14, p21); 188 rc = E_P16(p14, p16);
189 if (rc)
190 return rc;
119 191
120 SMBOWFencrypt(p21, c8, p24); 192 memcpy(p21, p16, 16);
193 rc = E_P24(p21, c8, p24);
121 194
122 memset(p14, 0, 15); 195 return rc;
123 memset(p21, 0, 21);
124} 196}
125 197
126/* Routines for Windows NT MD4 Hash functions. */ 198/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 351SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
280{ 352{
281 int rc; 353 int rc;
282 unsigned char p21[21]; 354 unsigned char p16[16], p21[21];
283 355
356 memset(p16, '\0', 16);
284 memset(p21, '\0', 21); 357 memset(p21, '\0', 21);
285 358
286 rc = E_md4hash(passwd, p21); 359 rc = E_md4hash(passwd, p16);
287 if (rc) { 360 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 361 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc; 362 return rc;
290 } 363 }
291 SMBOWFencrypt(p21, c8, p24); 364 memcpy(p21, p16, 16);
365 rc = E_P24(p21, c8, p24);
292 return rc; 366 return rc;
293} 367}
294 368
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
129 unsigned int len = iov[0].iov_len; 129 unsigned int len = iov[0].iov_len;
130 unsigned int total_len; 130 unsigned int total_len;
131 int first_vec = 0; 131 int first_vec = 0;
132 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 132 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
133 struct socket *ssocket = server->ssocket; 133 struct socket *ssocket = server->ssocket;
134 134
135 if (ssocket == NULL) 135 if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
144 else 144 else
145 smb_msg.msg_flags = MSG_NOSIGNAL; 145 smb_msg.msg_flags = MSG_NOSIGNAL;
146 146
147 /* smb header is converted in header_assemble. bcc and rest of SMB word
148 area, and byte area if necessary, is converted to littleendian in
149 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
150 Flags2 is converted in SendReceive */
151
152
153 total_len = 0; 147 total_len = 0;
154 for (i = 0; i < n_vec; i++) 148 for (i = 0; i < n_vec; i++)
155 total_len += iov[i].iov_len; 149 total_len += iov[i].iov_len;
156 150
157 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
158 cFYI(1, "Sending smb: total_len %d", total_len); 151 cFYI(1, "Sending smb: total_len %d", total_len);
159 dump_smb(smb_buffer, len); 152 dump_smb(smb_buffer, len);
160 153
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
243 236
244 /* Don't want to modify the buffer as a 237 /* Don't want to modify the buffer as a
245 side effect of this call. */ 238 side effect of this call. */
246 smb_buffer->smb_buf_length = smb_buf_length; 239 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
247 240
248 return rc; 241 return rc;
249} 242}
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
387#ifdef CONFIG_CIFS_STATS2 380#ifdef CONFIG_CIFS_STATS2
388 atomic_inc(&server->inSend); 381 atomic_inc(&server->inSend);
389#endif 382#endif
390 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 383 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
391#ifdef CONFIG_CIFS_STATS2 384#ifdef CONFIG_CIFS_STATS2
392 atomic_dec(&server->inSend); 385 atomic_dec(&server->inSend);
393 mid->when_sent = jiffies; 386 mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
422 int resp_buf_type; 415 int resp_buf_type;
423 416
424 iov[0].iov_base = (char *)in_buf; 417 iov[0].iov_base = (char *)in_buf;
425 iov[0].iov_len = in_buf->smb_buf_length + 4; 418 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
426 flags |= CIFS_NO_RESP; 419 flags |= CIFS_NO_RESP;
427 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 420 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
428 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 421 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
488 int rc = 0; 481 int rc = 0;
489 482
490 /* -4 for RFC1001 length and +2 for BCC field */ 483 /* -4 for RFC1001 length and +2 for BCC field */
491 in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; 484 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
492 in_buf->Command = SMB_COM_NT_CANCEL; 485 in_buf->Command = SMB_COM_NT_CANCEL;
493 in_buf->WordCount = 0; 486 in_buf->WordCount = 0;
494 put_bcc_le(0, in_buf); 487 put_bcc(0, in_buf);
495 488
496 mutex_lock(&server->srv_mutex); 489 mutex_lock(&server->srv_mutex);
497 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 490 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
499 mutex_unlock(&server->srv_mutex); 492 mutex_unlock(&server->srv_mutex);
500 return rc; 493 return rc;
501 } 494 }
502 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 495 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
503 mutex_unlock(&server->srv_mutex); 496 mutex_unlock(&server->srv_mutex);
504 497
505 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", 498 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
612 return rc; 605 return rc;
613 } 606 }
614 607
615 receive_len = midQ->resp_buf->smb_buf_length; 608 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
616 609
617 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 610 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
618 cERROR(1, "Frame too large received. Length: %d Xid: %d", 611 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
651 rc = map_smb_to_linux_error(midQ->resp_buf, 644 rc = map_smb_to_linux_error(midQ->resp_buf,
652 flags & CIFS_LOG_ERROR); 645 flags & CIFS_LOG_ERROR);
653 646
654 /* convert ByteCount if necessary */
655 if (receive_len >= sizeof(struct smb_hdr) - 4
656 /* do not count RFC1001 header */ +
657 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
658 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
659 if ((flags & CIFS_NO_RESP) == 0) 647 if ((flags & CIFS_NO_RESP) == 0)
660 midQ->resp_buf = NULL; /* mark it so buf will 648 midQ->resp_buf = NULL; /* mark it so buf will
661 not be freed by 649 not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
698 to the same server. We may make this configurable later or 686 to the same server. We may make this configurable later or
699 use ses->maxReq */ 687 use ses->maxReq */
700 688
701 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 689 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
690 MAX_CIFS_HDR_SIZE - 4) {
702 cERROR(1, "Illegal length, greater than maximum frame, %d", 691 cERROR(1, "Illegal length, greater than maximum frame, %d",
703 in_buf->smb_buf_length); 692 be32_to_cpu(in_buf->smb_buf_length));
704 return -EIO; 693 return -EIO;
705 } 694 }
706 695
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
733#ifdef CONFIG_CIFS_STATS2 722#ifdef CONFIG_CIFS_STATS2
734 atomic_inc(&ses->server->inSend); 723 atomic_inc(&ses->server->inSend);
735#endif 724#endif
736 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 725 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
737#ifdef CONFIG_CIFS_STATS2 726#ifdef CONFIG_CIFS_STATS2
738 atomic_dec(&ses->server->inSend); 727 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 728 midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
768 return rc; 757 return rc;
769 } 758 }
770 759
771 receive_len = midQ->resp_buf->smb_buf_length; 760 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
772 761
773 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 762 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
774 cERROR(1, "Frame too large received. Length: %d Xid: %d", 763 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
781 770
782 if (midQ->resp_buf && out_buf 771 if (midQ->resp_buf && out_buf
783 && (midQ->midState == MID_RESPONSE_RECEIVED)) { 772 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
784 out_buf->smb_buf_length = receive_len; 773 out_buf->smb_buf_length = cpu_to_be32(receive_len);
785 memcpy((char *)out_buf + 4, 774 memcpy((char *)out_buf + 4,
786 (char *)midQ->resp_buf + 4, 775 (char *)midQ->resp_buf + 4,
787 receive_len); 776 receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
800 } 789 }
801 } 790 }
802 791
803 *pbytes_returned = out_buf->smb_buf_length; 792 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
804 793
805 /* BB special case reconnect tid and uid here? */ 794 /* BB special case reconnect tid and uid here? */
806 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 795 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
807
808 /* convert ByteCount if necessary */
809 if (receive_len >= sizeof(struct smb_hdr) - 4
810 /* do not count RFC1001 header */ +
811 (2 * out_buf->WordCount) + 2 /* bcc */ )
812 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
813 } else { 796 } else {
814 rc = -EIO; 797 rc = -EIO;
815 cERROR(1, "Bad MID state?"); 798 cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
877 to the same server. We may make this configurable later or 860 to the same server. We may make this configurable later or
878 use ses->maxReq */ 861 use ses->maxReq */
879 862
880 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 863 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
864 MAX_CIFS_HDR_SIZE - 4) {
881 cERROR(1, "Illegal length, greater than maximum frame, %d", 865 cERROR(1, "Illegal length, greater than maximum frame, %d",
882 in_buf->smb_buf_length); 866 be32_to_cpu(in_buf->smb_buf_length));
883 return -EIO; 867 return -EIO;
884 } 868 }
885 869
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
910#ifdef CONFIG_CIFS_STATS2 894#ifdef CONFIG_CIFS_STATS2
911 atomic_inc(&ses->server->inSend); 895 atomic_inc(&ses->server->inSend);
912#endif 896#endif
913 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 897 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
914#ifdef CONFIG_CIFS_STATS2 898#ifdef CONFIG_CIFS_STATS2
915 atomic_dec(&ses->server->inSend); 899 atomic_dec(&ses->server->inSend);
916 midQ->when_sent = jiffies; 900 midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
977 if (rc != 0) 961 if (rc != 0)
978 return rc; 962 return rc;
979 963
980 receive_len = midQ->resp_buf->smb_buf_length; 964 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
981 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 965 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
982 cERROR(1, "Frame too large received. Length: %d Xid: %d", 966 cERROR(1, "Frame too large received. Length: %d Xid: %d",
983 receive_len, xid); 967 receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
993 goto out; 977 goto out;
994 } 978 }
995 979
996 out_buf->smb_buf_length = receive_len; 980 out_buf->smb_buf_length = cpu_to_be32(receive_len);
997 memcpy((char *)out_buf + 4, 981 memcpy((char *)out_buf + 4,
998 (char *)midQ->resp_buf + 4, 982 (char *)midQ->resp_buf + 4,
999 receive_len); 983 receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
1012 } 996 }
1013 } 997 }
1014 998
1015 *pbytes_returned = out_buf->smb_buf_length; 999 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
1016 1000
1017 /* BB special case reconnect tid and uid here? */ 1001 /* BB special case reconnect tid and uid here? */
1018 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 1002 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1019 1003
1020 /* convert ByteCount if necessary */
1021 if (receive_len >= sizeof(struct smb_hdr) - 4
1022 /* do not count RFC1001 header */ +
1023 (2 * out_buf->WordCount) + 2 /* bcc */ )
1024 put_bcc(get_bcc_le(out_buf), out_buf);
1025
1026out: 1004out:
1027 delete_mid(midQ); 1005 delete_mid(midQ);
1028 if (rstart && rc == -EACCES) 1006 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
112 struct cifsTconInfo *pTcon; 112 struct cifsTconInfo *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl;
115 116
116 if (direntry == NULL) 117 if (direntry == NULL)
117 return -EIO; 118 return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
166 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
167 (__u16)value_size, cifs_sb->local_nls, 168 (__u16)value_size, cifs_sb->local_nls,
168 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
170 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
171 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
172 pacl = kmalloc(value_size, GFP_KERNEL);
173 if (!pacl) {
174 cFYI(1, "%s: Can't allocate memory for ACL",
175 __func__);
176 rc = -ENOMEM;
177 } else {
178#ifdef CONFIG_CIFS_ACL
179 memcpy(pacl, ea_value, value_size);
180 rc = set_cifs_acl(pacl, value_size,
181 direntry->d_inode, full_path);
182 if (rc == 0) /* force revalidate of the inode */
183 CIFS_I(direntry->d_inode)->time = 0;
184 kfree(pacl);
185#else
186 cFYI(1, "Set CIFS ACL not supported yet");
187#endif /* CONFIG_CIFS_ACL */
188 }
169 } else { 189 } else {
170 int temp; 190 int temp;
171 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 191 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 90ff3cb10de3..9a37a9b6de3a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
53static void configfs_d_iput(struct dentry * dentry, 53static void configfs_d_iput(struct dentry * dentry,
54 struct inode * inode) 54 struct inode * inode)
55{ 55{
56 struct configfs_dirent * sd = dentry->d_fsdata; 56 struct configfs_dirent *sd = dentry->d_fsdata;
57 57
58 if (sd) { 58 if (sd) {
59 BUG_ON(sd->s_dentry != dentry); 59 BUG_ON(sd->s_dentry != dentry);
60 /* Coordinate with configfs_readdir */
61 spin_lock(&configfs_dirent_lock);
60 sd->s_dentry = NULL; 62 sd->s_dentry = NULL;
63 spin_unlock(&configfs_dirent_lock);
61 configfs_put(sd); 64 configfs_put(sd);
62 } 65 }
63 iput(inode); 66 iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
689 sd = child->d_fsdata; 692 sd = child->d_fsdata;
690 sd->s_type |= CONFIGFS_USET_DEFAULT; 693 sd->s_type |= CONFIGFS_USET_DEFAULT;
691 } else { 694 } else {
692 d_delete(child); 695 BUG_ON(child->d_inode);
696 d_drop(child);
693 dput(child); 697 dput(child);
694 } 698 }
695 } 699 }
@@ -990,7 +994,7 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
990 * This describes these functions and their helpers. 994 * This describes these functions and their helpers.
991 * 995 *
992 * Allow another kernel system to depend on a config_item. If this 996 * Allow another kernel system to depend on a config_item. If this
993 * happens, the item cannot go away until the dependant can live without 997 * happens, the item cannot go away until the dependent can live without
994 * it. The idea is to give client modules as simple an interface as 998 * it. The idea is to give client modules as simple an interface as
995 * possible. When a system asks them to depend on an item, they just 999 * possible. When a system asks them to depend on an item, they just
996 * call configfs_depend_item(). If the item is live and the client 1000 * call configfs_depend_item(). If the item is live and the client
@@ -1545,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1545 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1549 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1546 struct configfs_dirent *cursor = filp->private_data; 1550 struct configfs_dirent *cursor = filp->private_data;
1547 struct list_head *p, *q = &cursor->s_sibling; 1551 struct list_head *p, *q = &cursor->s_sibling;
1548 ino_t ino; 1552 ino_t ino = 0;
1549 int i = filp->f_pos; 1553 int i = filp->f_pos;
1550 1554
1551 switch (i) { 1555 switch (i) {
@@ -1573,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1573 struct configfs_dirent *next; 1577 struct configfs_dirent *next;
1574 const char * name; 1578 const char * name;
1575 int len; 1579 int len;
1580 struct inode *inode = NULL;
1576 1581
1577 next = list_entry(p, struct configfs_dirent, 1582 next = list_entry(p, struct configfs_dirent,
1578 s_sibling); 1583 s_sibling);
@@ -1581,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1581 1586
1582 name = configfs_get_name(next); 1587 name = configfs_get_name(next);
1583 len = strlen(name); 1588 len = strlen(name);
1584 if (next->s_dentry) 1589
1585 ino = next->s_dentry->d_inode->i_ino; 1590 /*
1586 else 1591 * We'll have a dentry and an inode for
1592 * PINNED items and for open attribute
1593 * files. We lock here to prevent a race
1594 * with configfs_d_iput() clearing
1595 * s_dentry before calling iput().
1596 *
1597 * Why do we go to the trouble? If
1598 * someone has an attribute file open,
1599 * the inode number should match until
1600 * they close it. Beyond that, we don't
1601 * care.
1602 */
1603 spin_lock(&configfs_dirent_lock);
1604 dentry = next->s_dentry;
1605 if (dentry)
1606 inode = dentry->d_inode;
1607 if (inode)
1608 ino = inode->i_ino;
1609 spin_unlock(&configfs_dirent_lock);
1610 if (!inode)
1587 ino = iunique(configfs_sb, 2); 1611 ino = iunique(configfs_sb, 2);
1588 1612
1589 if (filldir(dirent, name, len, filp->f_pos, ino, 1613 if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1683,7 +1707,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1683 err = configfs_attach_group(sd->s_element, &group->cg_item, 1707 err = configfs_attach_group(sd->s_element, &group->cg_item,
1684 dentry); 1708 dentry);
1685 if (err) { 1709 if (err) {
1686 d_delete(dentry); 1710 BUG_ON(dentry->d_inode);
1711 d_drop(dentry);
1687 dput(dentry); 1712 dput(dentry);
1688 } else { 1713 } else {
1689 spin_lock(&configfs_dirent_lock); 1714 spin_lock(&configfs_dirent_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..18b2a1f10ed8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* 41/*
@@ -99,12 +100,9 @@ static struct kmem_cache *dentry_cache __read_mostly;
99static unsigned int d_hash_mask __read_mostly; 100static unsigned int d_hash_mask __read_mostly;
100static unsigned int d_hash_shift __read_mostly; 101static unsigned int d_hash_shift __read_mostly;
101 102
102struct dcache_hash_bucket { 103static struct hlist_bl_head *dentry_hashtable __read_mostly;
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106 104
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 105static inline struct hlist_bl_head *d_hash(struct dentry *parent,
108 unsigned long hash) 106 unsigned long hash)
109{ 107{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 108 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -112,16 +110,6 @@ static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
112 return dentry_hashtable + (hash & D_HASHMASK); 110 return dentry_hashtable + (hash & D_HASHMASK);
113} 111}
114 112
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
124
125/* Statistics gathering. */ 113/* Statistics gathering. */
126struct dentry_stat_t dentry_stat = { 114struct dentry_stat_t dentry_stat = {
127 .age_limit = 45, 115 .age_limit = 45,
@@ -167,8 +155,8 @@ static void d_free(struct dentry *dentry)
167 if (dentry->d_op && dentry->d_op->d_release) 155 if (dentry->d_op && dentry->d_op->d_release)
168 dentry->d_op->d_release(dentry); 156 dentry->d_op->d_release(dentry);
169 157
170 /* if dentry was never inserted into hash, immediate free is OK */ 158 /* if dentry was never visible to RCU, immediate free is OK */
171 if (hlist_bl_unhashed(&dentry->d_hash)) 159 if (!(dentry->d_flags & DCACHE_RCUACCESS))
172 __d_free(&dentry->d_u.d_rcu); 160 __d_free(&dentry->d_u.d_rcu);
173 else 161 else
174 call_rcu(&dentry->d_u.d_rcu, __d_free); 162 call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -330,28 +318,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
330 */ 318 */
331void __d_drop(struct dentry *dentry) 319void __d_drop(struct dentry *dentry)
332{ 320{
333 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 321 if (!d_unhashed(dentry)) {
334 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 322 struct hlist_bl_head *b;
335 bit_spin_lock(0, 323 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
336 (unsigned long *)&dentry->d_sb->s_anon.first); 324 b = &dentry->d_sb->s_anon;
337 dentry->d_flags |= DCACHE_UNHASHED; 325 else
338 hlist_bl_del_init(&dentry->d_hash);
339 __bit_spin_unlock(0,
340 (unsigned long *)&dentry->d_sb->s_anon.first);
341 } else {
342 struct dcache_hash_bucket *b;
343 b = d_hash(dentry->d_parent, dentry->d_name.hash); 326 b = d_hash(dentry->d_parent, dentry->d_name.hash);
344 spin_lock_bucket(b); 327
345 /* 328 hlist_bl_lock(b);
346 * We may not actually need to put DCACHE_UNHASHED 329 __hlist_bl_del(&dentry->d_hash);
347 * manipulations under the hash lock, but follow 330 dentry->d_hash.pprev = NULL;
348 * the principle of least surprise. 331 hlist_bl_unlock(b);
349 */ 332
350 dentry->d_flags |= DCACHE_UNHASHED; 333 dentry_rcuwalk_barrier(dentry);
351 hlist_bl_del_rcu(&dentry->d_hash);
352 spin_unlock_bucket(b);
353 dentry_rcuwalk_barrier(dentry);
354 }
355 } 334 }
356} 335}
357EXPORT_SYMBOL(__d_drop); 336EXPORT_SYMBOL(__d_drop);
@@ -1304,7 +1283,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1304 dname[name->len] = 0; 1283 dname[name->len] = 0;
1305 1284
1306 dentry->d_count = 1; 1285 dentry->d_count = 1;
1307 dentry->d_flags = DCACHE_UNHASHED; 1286 dentry->d_flags = 0;
1308 spin_lock_init(&dentry->d_lock); 1287 spin_lock_init(&dentry->d_lock);
1309 seqcount_init(&dentry->d_seq); 1288 seqcount_init(&dentry->d_seq);
1310 dentry->d_inode = NULL; 1289 dentry->d_inode = NULL;
@@ -1606,10 +1585,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
1606 tmp->d_inode = inode; 1585 tmp->d_inode = inode;
1607 tmp->d_flags |= DCACHE_DISCONNECTED; 1586 tmp->d_flags |= DCACHE_DISCONNECTED;
1608 list_add(&tmp->d_alias, &inode->i_dentry); 1587 list_add(&tmp->d_alias, &inode->i_dentry);
1609 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1588 hlist_bl_lock(&tmp->d_sb->s_anon);
1610 tmp->d_flags &= ~DCACHE_UNHASHED;
1611 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1589 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1612 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1590 hlist_bl_unlock(&tmp->d_sb->s_anon);
1613 spin_unlock(&tmp->d_lock); 1591 spin_unlock(&tmp->d_lock);
1614 spin_unlock(&inode->i_lock); 1592 spin_unlock(&inode->i_lock);
1615 security_d_instantiate(tmp, inode); 1593 security_d_instantiate(tmp, inode);
@@ -1789,7 +1767,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1789 unsigned int len = name->len; 1767 unsigned int len = name->len;
1790 unsigned int hash = name->hash; 1768 unsigned int hash = name->hash;
1791 const unsigned char *str = name->name; 1769 const unsigned char *str = name->name;
1792 struct dcache_hash_bucket *b = d_hash(parent, hash); 1770 struct hlist_bl_head *b = d_hash(parent, hash);
1793 struct hlist_bl_node *node; 1771 struct hlist_bl_node *node;
1794 struct dentry *dentry; 1772 struct dentry *dentry;
1795 1773
@@ -1813,7 +1791,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1813 * 1791 *
1814 * See Documentation/filesystems/path-lookup.txt for more details. 1792 * See Documentation/filesystems/path-lookup.txt for more details.
1815 */ 1793 */
1816 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1794 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1817 struct inode *i; 1795 struct inode *i;
1818 const char *tname; 1796 const char *tname;
1819 int tlen; 1797 int tlen;
@@ -1908,7 +1886,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1908 unsigned int len = name->len; 1886 unsigned int len = name->len;
1909 unsigned int hash = name->hash; 1887 unsigned int hash = name->hash;
1910 const unsigned char *str = name->name; 1888 const unsigned char *str = name->name;
1911 struct dcache_hash_bucket *b = d_hash(parent, hash); 1889 struct hlist_bl_head *b = d_hash(parent, hash);
1912 struct hlist_bl_node *node; 1890 struct hlist_bl_node *node;
1913 struct dentry *found = NULL; 1891 struct dentry *found = NULL;
1914 struct dentry *dentry; 1892 struct dentry *dentry;
@@ -1935,7 +1913,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1935 */ 1913 */
1936 rcu_read_lock(); 1914 rcu_read_lock();
1937 1915
1938 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1916 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1939 const char *tname; 1917 const char *tname;
1940 int tlen; 1918 int tlen;
1941 1919
@@ -2086,13 +2064,13 @@ again:
2086} 2064}
2087EXPORT_SYMBOL(d_delete); 2065EXPORT_SYMBOL(d_delete);
2088 2066
2089static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2067static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2090{ 2068{
2091 BUG_ON(!d_unhashed(entry)); 2069 BUG_ON(!d_unhashed(entry));
2092 spin_lock_bucket(b); 2070 hlist_bl_lock(b);
2093 entry->d_flags &= ~DCACHE_UNHASHED; 2071 entry->d_flags |= DCACHE_RCUACCESS;
2094 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2072 hlist_bl_add_head_rcu(&entry->d_hash, b);
2095 spin_unlock_bucket(b); 2073 hlist_bl_unlock(b);
2096} 2074}
2097 2075
2098static void _d_rehash(struct dentry * entry) 2076static void _d_rehash(struct dentry * entry)
@@ -2131,7 +2109,7 @@ EXPORT_SYMBOL(d_rehash);
2131 */ 2109 */
2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2110void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2133{ 2111{
2134 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2112 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2113 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2136 2114
2137 spin_lock(&dentry->d_lock); 2115 spin_lock(&dentry->d_lock);
@@ -3025,7 +3003,7 @@ static void __init dcache_init_early(void)
3025 3003
3026 dentry_hashtable = 3004 dentry_hashtable =
3027 alloc_large_system_hash("Dentry cache", 3005 alloc_large_system_hash("Dentry cache",
3028 sizeof(struct dcache_hash_bucket), 3006 sizeof(struct hlist_bl_head),
3029 dhash_entries, 3007 dhash_entries,
3030 13, 3008 13,
3031 HASH_EARLY, 3009 HASH_EARLY,
@@ -3034,7 +3012,7 @@ static void __init dcache_init_early(void)
3034 0); 3012 0);
3035 3013
3036 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3014 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3037 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3015 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3038} 3016}
3039 3017
3040static void __init dcache_init(void) 3018static void __init dcache_init(void)
@@ -3057,7 +3035,7 @@ static void __init dcache_init(void)
3057 3035
3058 dentry_hashtable = 3036 dentry_hashtable =
3059 alloc_large_system_hash("Dentry cache", 3037 alloc_large_system_hash("Dentry cache",
3060 sizeof(struct dcache_hash_bucket), 3038 sizeof(struct hlist_bl_head),
3061 dhash_entries, 3039 dhash_entries,
3062 13, 3040 13,
3063 0, 3041 0,
@@ -3066,7 +3044,7 @@ static void __init dcache_init(void)
3066 0); 3044 0);
3067 3045
3068 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3046 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3069 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3047 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3070} 3048}
3071 3049
3072/* SLAB cache for __getname() consumers */ 3050/* SLAB cache for __getname() consumers */
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
428 size_t count, loff_t *ppos) 428 size_t count, loff_t *ppos)
429{ 429{
430 char buf[32]; 430 char buf[32];
431 int buf_size; 431 size_t buf_size;
432 bool bv;
432 u32 *val = file->private_data; 433 u32 *val = file->private_data;
433 434
434 buf_size = min(count, (sizeof(buf)-1)); 435 buf_size = min(count, (sizeof(buf)-1));
435 if (copy_from_user(buf, user_buf, buf_size)) 436 if (copy_from_user(buf, user_buf, buf_size))
436 return -EFAULT; 437 return -EFAULT;
437 438
438 switch (buf[0]) { 439 if (strtobool(buf, &bv) == 0)
439 case 'y': 440 *val = bv;
440 case 'Y': 441
441 case '1':
442 *val = 1;
443 break;
444 case 'n':
445 case 'N':
446 case '0':
447 *val = 0;
448 break;
449 }
450
451 return count; 442 return count;
452} 443}
453 444
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 04b8c449303f..56d6bfcc1e48 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -519,7 +519,7 @@ static void toss_rsb(struct kref *kref)
519 } 519 }
520} 520}
521 521
522/* When all references to the rsb are gone it's transfered to 522/* When all references to the rsb are gone it's transferred to
523 the tossed list for later disposal. */ 523 the tossed list for later disposal. */
524 524
525static void put_rsb(struct dlm_rsb *r) 525static void put_rsb(struct dlm_rsb *r)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index bffa1e73b9a9..5e2c71f05e46 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -810,7 +810,7 @@ static int tcp_accept_from_sock(struct connection *con)
810 810
811 /* 811 /*
812 * Add it to the active queue in case we got data 812 * Add it to the active queue in case we got data
813 * beween processing the accept adding the socket 813 * between processing the accept adding the socket
814 * to the read_sockets list 814 * to the read_sockets list
815 */ 815 */
816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index eda43f362616..14638235f7b2 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -304,7 +304,7 @@ static void set_master_lkbs(struct dlm_rsb *r)
304} 304}
305 305
306/* 306/*
307 * Propogate the new master nodeid to locks 307 * Propagate the new master nodeid to locks
308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. 308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which 309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
310 * rsb's to consider. 310 * rsb's to consider.
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index d2a70a4561f9..b8d5c8091024 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1452,6 +1452,25 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1453} 1453}
1454 1454
1455void ecryptfs_i_size_init(const char *page_virt, struct inode *inode)
1456{
1457 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1458 struct ecryptfs_crypt_stat *crypt_stat;
1459 u64 file_size;
1460
1461 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
1462 mount_crypt_stat =
1463 &ecryptfs_superblock_to_private(inode->i_sb)->mount_crypt_stat;
1464 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
1465 file_size = i_size_read(ecryptfs_inode_to_lower(inode));
1466 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
1467 file_size += crypt_stat->metadata_size;
1468 } else
1469 file_size = get_unaligned_be64(page_virt);
1470 i_size_write(inode, (loff_t)file_size);
1471 crypt_stat->flags |= ECRYPTFS_I_SIZE_INITIALIZED;
1472}
1473
1455/** 1474/**
1456 * ecryptfs_read_headers_virt 1475 * ecryptfs_read_headers_virt
1457 * @page_virt: The virtual address into which to read the headers 1476 * @page_virt: The virtual address into which to read the headers
@@ -1482,6 +1501,8 @@ static int ecryptfs_read_headers_virt(char *page_virt,
1482 rc = -EINVAL; 1501 rc = -EINVAL;
1483 goto out; 1502 goto out;
1484 } 1503 }
1504 if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
1505 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
1485 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1506 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1486 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), 1507 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
1487 &bytes_read); 1508 &bytes_read);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bd3cafd0949d..e70282775e2c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800 269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800
270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000
271#define ECRYPTFS_UNLINK_SIGS 0x00002000 271#define ECRYPTFS_UNLINK_SIGS 0x00002000
272#define ECRYPTFS_I_SIZE_INITIALIZED 0x00004000
272 u32 flags; 273 u32 flags;
273 unsigned int file_version; 274 unsigned int file_version;
274 size_t iv_bytes; 275 size_t iv_bytes;
@@ -295,6 +296,8 @@ struct ecryptfs_crypt_stat {
295struct ecryptfs_inode_info { 296struct ecryptfs_inode_info {
296 struct inode vfs_inode; 297 struct inode vfs_inode;
297 struct inode *wii_inode; 298 struct inode *wii_inode;
299 struct mutex lower_file_mutex;
300 atomic_t lower_file_count;
298 struct file *lower_file; 301 struct file *lower_file;
299 struct ecryptfs_crypt_stat crypt_stat; 302 struct ecryptfs_crypt_stat crypt_stat;
300}; 303};
@@ -626,6 +629,7 @@ struct ecryptfs_open_req {
626int ecryptfs_interpose(struct dentry *hidden_dentry, 629int ecryptfs_interpose(struct dentry *hidden_dentry,
627 struct dentry *this_dentry, struct super_block *sb, 630 struct dentry *this_dentry, struct super_block *sb,
628 u32 flags); 631 u32 flags);
632void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
629int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
630 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
631 struct inode *ecryptfs_dir_inode); 635 struct inode *ecryptfs_dir_inode);
@@ -757,7 +761,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
757 struct dentry *lower_dentry, 761 struct dentry *lower_dentry,
758 struct vfsmount *lower_mnt, 762 struct vfsmount *lower_mnt,
759 const struct cred *cred); 763 const struct cred *cred);
760int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); 764int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry);
765void ecryptfs_put_lower_file(struct inode *inode);
761int 766int
762ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, 767ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
763 size_t *packet_size, 768 size_t *packet_size,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index cedc913d11ba..566e5472f78c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -191,10 +191,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 194 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
195 if (rc) { 195 if (rc) {
196 printk(KERN_ERR "%s: Error attempting to initialize " 196 printk(KERN_ERR "%s: Error attempting to initialize "
197 "the persistent file for the dentry with name " 197 "the lower file for the dentry with name "
198 "[%s]; rc = [%d]\n", __func__, 198 "[%s]; rc = [%d]\n", __func__,
199 ecryptfs_dentry->d_name.name, rc); 199 ecryptfs_dentry->d_name.name, rc);
200 goto out_free; 200 goto out_free;
@@ -202,9 +202,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) 202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { 203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
204 rc = -EPERM; 204 rc = -EPERM;
205 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " 205 printk(KERN_WARNING "%s: Lower file is RO; eCryptfs "
206 "file must hence be opened RO\n", __func__); 206 "file must hence be opened RO\n", __func__);
207 goto out_free; 207 goto out_put;
208 } 208 }
209 ecryptfs_set_file_lower( 209 ecryptfs_set_file_lower(
210 file, ecryptfs_inode_to_private(inode)->lower_file); 210 file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -232,10 +232,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
232 "Plaintext passthrough mode is not " 232 "Plaintext passthrough mode is not "
233 "enabled; returning -EIO\n"); 233 "enabled; returning -EIO\n");
234 mutex_unlock(&crypt_stat->cs_mutex); 234 mutex_unlock(&crypt_stat->cs_mutex);
235 goto out_free; 235 goto out_put;
236 } 236 }
237 rc = 0; 237 rc = 0;
238 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 238 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
239 | ECRYPTFS_ENCRYPTED);
239 mutex_unlock(&crypt_stat->cs_mutex); 240 mutex_unlock(&crypt_stat->cs_mutex);
240 goto out; 241 goto out;
241 } 242 }
@@ -245,6 +246,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
245 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, 246 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
246 (unsigned long long)i_size_read(inode)); 247 (unsigned long long)i_size_read(inode));
247 goto out; 248 goto out;
249out_put:
250 ecryptfs_put_lower_file(inode);
248out_free: 251out_free:
249 kmem_cache_free(ecryptfs_file_info_cache, 252 kmem_cache_free(ecryptfs_file_info_cache,
250 ecryptfs_file_to_private(file)); 253 ecryptfs_file_to_private(file));
@@ -254,17 +257,13 @@ out:
254 257
255static int ecryptfs_flush(struct file *file, fl_owner_t td) 258static int ecryptfs_flush(struct file *file, fl_owner_t td)
256{ 259{
257 int rc = 0; 260 return file->f_mode & FMODE_WRITE
258 struct file *lower_file = NULL; 261 ? filemap_write_and_wait(file->f_mapping) : 0;
259
260 lower_file = ecryptfs_file_to_lower(file);
261 if (lower_file->f_op && lower_file->f_op->flush)
262 rc = lower_file->f_op->flush(lower_file, td);
263 return rc;
264} 262}
265 263
266static int ecryptfs_release(struct inode *inode, struct file *file) 264static int ecryptfs_release(struct inode *inode, struct file *file)
267{ 265{
266 ecryptfs_put_lower_file(inode);
268 kmem_cache_free(ecryptfs_file_info_cache, 267 kmem_cache_free(ecryptfs_file_info_cache,
269 ecryptfs_file_to_private(file)); 268 ecryptfs_file_to_private(file));
270 return 0; 269 return 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f99051b7adab..4d4cc6a90cd5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -168,19 +168,18 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
168 "context; rc = [%d]\n", rc); 168 "context; rc = [%d]\n", rc);
169 goto out; 169 goto out;
170 } 170 }
171 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 171 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
172 if (rc) { 172 if (rc) {
173 printk(KERN_ERR "%s: Error attempting to initialize " 173 printk(KERN_ERR "%s: Error attempting to initialize "
174 "the persistent file for the dentry with name " 174 "the lower file for the dentry with name "
175 "[%s]; rc = [%d]\n", __func__, 175 "[%s]; rc = [%d]\n", __func__,
176 ecryptfs_dentry->d_name.name, rc); 176 ecryptfs_dentry->d_name.name, rc);
177 goto out; 177 goto out;
178 } 178 }
179 rc = ecryptfs_write_metadata(ecryptfs_dentry); 179 rc = ecryptfs_write_metadata(ecryptfs_dentry);
180 if (rc) { 180 if (rc)
181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
182 goto out; 182 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
183 }
184out: 183out:
185 return rc; 184 return rc;
186} 185}
@@ -226,11 +225,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
226 struct dentry *lower_dir_dentry; 225 struct dentry *lower_dir_dentry;
227 struct vfsmount *lower_mnt; 226 struct vfsmount *lower_mnt;
228 struct inode *lower_inode; 227 struct inode *lower_inode;
229 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
230 struct ecryptfs_crypt_stat *crypt_stat; 228 struct ecryptfs_crypt_stat *crypt_stat;
231 char *page_virt = NULL; 229 char *page_virt = NULL;
232 u64 file_size; 230 int put_lower = 0, rc = 0;
233 int rc = 0;
234 231
235 lower_dir_dentry = lower_dentry->d_parent; 232 lower_dir_dentry = lower_dentry->d_parent;
236 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( 233 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
@@ -277,14 +274,15 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
277 rc = -ENOMEM; 274 rc = -ENOMEM;
278 goto out; 275 goto out;
279 } 276 }
280 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 277 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
281 if (rc) { 278 if (rc) {
282 printk(KERN_ERR "%s: Error attempting to initialize " 279 printk(KERN_ERR "%s: Error attempting to initialize "
283 "the persistent file for the dentry with name " 280 "the lower file for the dentry with name "
284 "[%s]; rc = [%d]\n", __func__, 281 "[%s]; rc = [%d]\n", __func__,
285 ecryptfs_dentry->d_name.name, rc); 282 ecryptfs_dentry->d_name.name, rc);
286 goto out_free_kmem; 283 goto out_free_kmem;
287 } 284 }
285 put_lower = 1;
288 crypt_stat = &ecryptfs_inode_to_private( 286 crypt_stat = &ecryptfs_inode_to_private(
289 ecryptfs_dentry->d_inode)->crypt_stat; 287 ecryptfs_dentry->d_inode)->crypt_stat;
290 /* TODO: lock for crypt_stat comparison */ 288 /* TODO: lock for crypt_stat comparison */
@@ -302,18 +300,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
302 } 300 }
303 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; 301 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
304 } 302 }
305 mount_crypt_stat = &ecryptfs_superblock_to_private( 303 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
306 ecryptfs_dentry->d_sb)->mount_crypt_stat;
307 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
308 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
309 file_size = (crypt_stat->metadata_size
310 + i_size_read(lower_dentry->d_inode));
311 else
312 file_size = i_size_read(lower_dentry->d_inode);
313 } else {
314 file_size = get_unaligned_be64(page_virt);
315 }
316 i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
317out_free_kmem: 304out_free_kmem:
318 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 305 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
319 goto out; 306 goto out;
@@ -322,6 +309,8 @@ out_put:
322 mntput(lower_mnt); 309 mntput(lower_mnt);
323 d_drop(ecryptfs_dentry); 310 d_drop(ecryptfs_dentry);
324out: 311out:
312 if (put_lower)
313 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
325 return rc; 314 return rc;
326} 315}
327 316
@@ -538,8 +527,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
538 dget(lower_dentry); 527 dget(lower_dentry);
539 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); 528 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
540 dput(lower_dentry); 529 dput(lower_dentry);
541 if (!rc)
542 d_delete(lower_dentry);
543 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 530 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
544 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 531 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
545 unlock_dir(lower_dir_dentry); 532 unlock_dir(lower_dir_dentry);
@@ -610,8 +597,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
610 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 597 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
611out_lock: 598out_lock:
612 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 599 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
613 dput(lower_new_dentry->d_parent); 600 dput(lower_new_dir_dentry);
614 dput(lower_old_dentry->d_parent); 601 dput(lower_old_dir_dentry);
615 dput(lower_new_dentry); 602 dput(lower_new_dentry);
616 dput(lower_old_dentry); 603 dput(lower_old_dentry);
617 return rc; 604 return rc;
@@ -759,8 +746,11 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
759 746
760 if (unlikely((ia->ia_size == i_size))) { 747 if (unlikely((ia->ia_size == i_size))) {
761 lower_ia->ia_valid &= ~ATTR_SIZE; 748 lower_ia->ia_valid &= ~ATTR_SIZE;
762 goto out; 749 return 0;
763 } 750 }
751 rc = ecryptfs_get_lower_file(dentry);
752 if (rc)
753 return rc;
764 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 754 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
765 /* Switch on growing or shrinking file */ 755 /* Switch on growing or shrinking file */
766 if (ia->ia_size > i_size) { 756 if (ia->ia_size > i_size) {
@@ -838,6 +828,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
838 lower_ia->ia_valid &= ~ATTR_SIZE; 828 lower_ia->ia_valid &= ~ATTR_SIZE;
839 } 829 }
840out: 830out:
831 ecryptfs_put_lower_file(inode);
841 return rc; 832 return rc;
842} 833}
843 834
@@ -913,7 +904,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
913 904
914 mount_crypt_stat = &ecryptfs_superblock_to_private( 905 mount_crypt_stat = &ecryptfs_superblock_to_private(
915 dentry->d_sb)->mount_crypt_stat; 906 dentry->d_sb)->mount_crypt_stat;
907 rc = ecryptfs_get_lower_file(dentry);
908 if (rc) {
909 mutex_unlock(&crypt_stat->cs_mutex);
910 goto out;
911 }
916 rc = ecryptfs_read_metadata(dentry); 912 rc = ecryptfs_read_metadata(dentry);
913 ecryptfs_put_lower_file(inode);
917 if (rc) { 914 if (rc) {
918 if (!(mount_crypt_stat->flags 915 if (!(mount_crypt_stat->flags
919 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { 916 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
@@ -927,10 +924,17 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
927 goto out; 924 goto out;
928 } 925 }
929 rc = 0; 926 rc = 0;
930 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 927 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
928 | ECRYPTFS_ENCRYPTED);
931 } 929 }
932 } 930 }
933 mutex_unlock(&crypt_stat->cs_mutex); 931 mutex_unlock(&crypt_stat->cs_mutex);
932 if (S_ISREG(inode->i_mode)) {
933 rc = filemap_write_and_wait(inode->i_mapping);
934 if (rc)
935 goto out;
936 fsstack_copy_attr_all(inode, lower_inode);
937 }
934 memcpy(&lower_ia, ia, sizeof(lower_ia)); 938 memcpy(&lower_ia, ia, sizeof(lower_ia));
935 if (ia->ia_valid & ATTR_FILE) 939 if (ia->ia_valid & ATTR_FILE)
936 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); 940 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0851ab6980f5..69f994a7d524 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -44,7 +44,7 @@ static struct task_struct *ecryptfs_kthread;
44 * @ignored: ignored 44 * @ignored: ignored
45 * 45 *
46 * The eCryptfs kernel thread that has the responsibility of getting 46 * The eCryptfs kernel thread that has the responsibility of getting
47 * the lower persistent file with RW permissions. 47 * the lower file with RW permissions.
48 * 48 *
49 * Returns zero on success; non-zero otherwise 49 * Returns zero on success; non-zero otherwise
50 */ 50 */
@@ -141,8 +141,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
141 int rc = 0; 141 int rc = 0;
142 142
143 /* Corresponding dput() and mntput() are done when the 143 /* Corresponding dput() and mntput() are done when the
144 * persistent file is fput() when the eCryptfs inode is 144 * lower file is fput() when all eCryptfs files for the inode are
145 * destroyed. */ 145 * released. */
146 dget(lower_dentry); 146 dget(lower_dentry);
147 mntget(lower_mnt); 147 mntget(lower_mnt);
148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; 148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c27c0ecf90bc..89b93389af8e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -96,7 +96,7 @@ void __ecryptfs_printk(const char *fmt, ...)
96} 96}
97 97
98/** 98/**
99 * ecryptfs_init_persistent_file 99 * ecryptfs_init_lower_file
100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with 100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
101 * the lower dentry and the lower mount set 101 * the lower dentry and the lower mount set
102 * 102 *
@@ -104,42 +104,70 @@ void __ecryptfs_printk(const char *fmt, ...)
104 * inode. All I/O operations to the lower inode occur through that 104 * inode. All I/O operations to the lower inode occur through that
105 * file. When the first eCryptfs dentry that interposes with the first 105 * file. When the first eCryptfs dentry that interposes with the first
106 * lower dentry for that inode is created, this function creates the 106 * lower dentry for that inode is created, this function creates the
107 * persistent file struct and associates it with the eCryptfs 107 * lower file struct and associates it with the eCryptfs
108 * inode. When the eCryptfs inode is destroyed, the file is closed. 108 * inode. When all eCryptfs files associated with the inode are released, the
109 * file is closed.
109 * 110 *
110 * The persistent file will be opened with read/write permissions, if 111 * The lower file will be opened with read/write permissions, if
111 * possible. Otherwise, it is opened read-only. 112 * possible. Otherwise, it is opened read-only.
112 * 113 *
113 * This function does nothing if a lower persistent file is already 114 * This function does nothing if a lower file is already
114 * associated with the eCryptfs inode. 115 * associated with the eCryptfs inode.
115 * 116 *
116 * Returns zero on success; non-zero otherwise 117 * Returns zero on success; non-zero otherwise
117 */ 118 */
118int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 119static int ecryptfs_init_lower_file(struct dentry *dentry,
120 struct file **lower_file)
119{ 121{
120 const struct cred *cred = current_cred(); 122 const struct cred *cred = current_cred();
121 struct ecryptfs_inode_info *inode_info = 123 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
122 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 124 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
123 int rc = 0; 125 int rc;
124 126
125 if (!inode_info->lower_file) { 127 rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt,
126 struct dentry *lower_dentry; 128 cred);
127 struct vfsmount *lower_mnt = 129 if (rc) {
128 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 printk(KERN_ERR "Error opening lower file "
131 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
132 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
133 (*lower_file) = NULL;
134 }
135 return rc;
136}
129 137
130 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 138int ecryptfs_get_lower_file(struct dentry *dentry)
131 rc = ecryptfs_privileged_open(&inode_info->lower_file, 139{
132 lower_dentry, lower_mnt, cred); 140 struct ecryptfs_inode_info *inode_info =
133 if (rc) { 141 ecryptfs_inode_to_private(dentry->d_inode);
134 printk(KERN_ERR "Error opening lower persistent file " 142 int count, rc = 0;
135 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 143
136 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 144 mutex_lock(&inode_info->lower_file_mutex);
137 inode_info->lower_file = NULL; 145 count = atomic_inc_return(&inode_info->lower_file_count);
138 } 146 if (WARN_ON_ONCE(count < 1))
147 rc = -EINVAL;
148 else if (count == 1) {
149 rc = ecryptfs_init_lower_file(dentry,
150 &inode_info->lower_file);
151 if (rc)
152 atomic_set(&inode_info->lower_file_count, 0);
139 } 153 }
154 mutex_unlock(&inode_info->lower_file_mutex);
140 return rc; 155 return rc;
141} 156}
142 157
158void ecryptfs_put_lower_file(struct inode *inode)
159{
160 struct ecryptfs_inode_info *inode_info;
161
162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) {
165 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex);
168 }
169}
170
143static struct inode *ecryptfs_get_inode(struct inode *lower_inode, 171static struct inode *ecryptfs_get_inode(struct inode *lower_inode,
144 struct super_block *sb) 172 struct super_block *sb)
145{ 173{
@@ -276,7 +304,7 @@ static void ecryptfs_init_mount_crypt_stat(
276/** 304/**
277 * ecryptfs_parse_options 305 * ecryptfs_parse_options
278 * @sb: The ecryptfs super block 306 * @sb: The ecryptfs super block
279 * @options: The options pased to the kernel 307 * @options: The options passed to the kernel
280 * 308 *
281 * Parse mount options: 309 * Parse mount options:
282 * debug=N - ecryptfs_verbosity level for debug output 310 * debug=N - ecryptfs_verbosity level for debug output
@@ -840,7 +868,7 @@ static int __init ecryptfs_init(void)
840 } 868 }
841 rc = ecryptfs_init_messaging(); 869 rc = ecryptfs_init_messaging();
842 if (rc) { 870 if (rc) {
843 printk(KERN_ERR "Failure occured while attempting to " 871 printk(KERN_ERR "Failure occurred while attempting to "
844 "initialize the communications channel to " 872 "initialize the communications channel to "
845 "ecryptfsd\n"); 873 "ecryptfsd\n");
846 goto out_destroy_kthread; 874 goto out_destroy_kthread;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index bacc882e1ae4..245b517bf1b6 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -55,6 +55,8 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
55 if (unlikely(!inode_info)) 55 if (unlikely(!inode_info))
56 goto out; 56 goto out;
57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat); 57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
58 mutex_init(&inode_info->lower_file_mutex);
59 atomic_set(&inode_info->lower_file_count, 0);
58 inode_info->lower_file = NULL; 60 inode_info->lower_file = NULL;
59 inode = &inode_info->vfs_inode; 61 inode = &inode_info->vfs_inode;
60out: 62out:
@@ -77,8 +79,7 @@ static void ecryptfs_i_callback(struct rcu_head *head)
77 * 79 *
78 * This is used during the final destruction of the inode. All 80 * This is used during the final destruction of the inode. All
79 * allocation of memory related to the inode, including allocated 81 * allocation of memory related to the inode, including allocated
80 * memory in the crypt_stat struct, will be released here. This 82 * memory in the crypt_stat struct, will be released here.
81 * function also fput()'s the persistent file for the lower inode.
82 * There should be no chance that this deallocation will be missed. 83 * There should be no chance that this deallocation will be missed.
83 */ 84 */
84static void ecryptfs_destroy_inode(struct inode *inode) 85static void ecryptfs_destroy_inode(struct inode *inode)
@@ -86,16 +87,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
86 struct ecryptfs_inode_info *inode_info; 87 struct ecryptfs_inode_info *inode_info;
87 88
88 inode_info = ecryptfs_inode_to_private(inode); 89 inode_info = ecryptfs_inode_to_private(inode);
89 if (inode_info->lower_file) { 90 BUG_ON(inode_info->lower_file);
90 struct dentry *lower_dentry =
91 inode_info->lower_file->f_dentry;
92
93 BUG_ON(!lower_dentry);
94 if (lower_dentry->d_inode) {
95 fput(inode_info->lower_file);
96 inode_info->lower_file = NULL;
97 }
98 }
99 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
100 call_rcu(&inode->i_rcu, ecryptfs_i_callback); 92 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
101} 93}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ed38801b57a7..f9cfd168fbe2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -181,7 +181,7 @@ struct eventpoll {
181 181
182 /* 182 /*
183 * This is a single linked list that chains all the "struct epitem" that 183 * This is a single linked list that chains all the "struct epitem" that
184 * happened while transfering ready events to userspace w/out 184 * happened while transferring ready events to userspace w/out
185 * holding ->lock. 185 * holding ->lock.
186 */ 186 */
187 struct epitem *ovflist; 187 struct epitem *ovflist;
@@ -606,7 +606,7 @@ static void ep_free(struct eventpoll *ep)
606 * We do not need to hold "ep->mtx" here because the epoll file 606 * We do not need to hold "ep->mtx" here because the epoll file
607 * is on the way to be removed and no one has references to it 607 * is on the way to be removed and no one has references to it
608 * anymore. The only hit might come from eventpoll_release_file() but 608 * anymore. The only hit might come from eventpoll_release_file() but
609 * holding "epmutex" is sufficent here. 609 * holding "epmutex" is sufficient here.
610 */ 610 */
611 mutex_lock(&epmutex); 611 mutex_lock(&epmutex);
612 612
@@ -720,7 +720,7 @@ void eventpoll_release_file(struct file *file)
720 /* 720 /*
721 * We don't want to get "file->f_lock" because it is not 721 * We don't want to get "file->f_lock" because it is not
722 * necessary. It is not necessary because we're in the "struct file" 722 * necessary. It is not necessary because we're in the "struct file"
723 * cleanup path, and this means that noone is using this file anymore. 723 * cleanup path, and this means that no one is using this file anymore.
724 * So, for example, epoll_ctl() cannot hit here since if we reach this 724 * So, for example, epoll_ctl() cannot hit here since if we reach this
725 * point, the file counter already went to zero and fget() would fail. 725 * point, the file counter already went to zero and fget() would fail.
726 * The only hit might come from ep_free() but by holding the mutex 726 * The only hit might come from ep_free() but by holding the mutex
@@ -1112,7 +1112,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1112 * Trigger mode, we need to insert back inside 1112 * Trigger mode, we need to insert back inside
1113 * the ready list, so that the next call to 1113 * the ready list, so that the next call to
1114 * epoll_wait() will check again the events 1114 * epoll_wait() will check again the events
1115 * availability. At this point, noone can insert 1115 * availability. At this point, no one can insert
1116 * into ep->rdllist besides us. The epoll_ctl() 1116 * into ep->rdllist besides us. The epoll_ctl()
1117 * callers are locked out by 1117 * callers are locked out by
1118 * ep_scan_ready_list() holding "mtx" and the 1118 * ep_scan_ready_list() holding "mtx" and the
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..8328beb9016f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1659 1659
1660 t = start; 1660 t = start;
1661 do { 1661 do {
1662 task_clear_group_stop_pending(t);
1662 if (t != current && t->mm) { 1663 if (t != current && t->mm) {
1663 sigaddset(&t->pending.signal, SIGKILL); 1664 sigaddset(&t->pending.signal, SIGKILL);
1664 signal_wake_up(t, 1); 1665 signal_wake_up(t, 1);
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 5e74ad3d4009..3bbd46956d77 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -115,7 +115,7 @@ struct exofs_sb_stats {
115 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
116 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
117 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
118 * alignment at begining. We take care of it at exofs_device_table. 118 * alignment at beginning. We take care of it at exofs_device_table.
119 */ 119 */
120struct exofs_dt_data_map { 120struct exofs_dt_data_map {
121 __le32 cb_num_comps; 121 __le32 cb_num_comps;
@@ -136,7 +136,7 @@ struct exofs_dt_device_info {
136 u8 systemid[OSD_SYSTEMID_LEN]; 136 u8 systemid[OSD_SYSTEMID_LEN];
137 __le64 long_name_offset; /* If !0 then offset-in-file */ 137 __le64 long_name_offset; /* If !0 then offset-in-file */
138 __le32 osdname_len; /* */ 138 __le32 osdname_len; /* */
139 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ 139 u8 osdname[44]; /* Embbeded, Usually an asci uuid */
140} __packed; 140} __packed;
141 141
142/* 142/*
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 0d06f4e75699..8f44cef1b3ef 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -850,7 +850,7 @@ static int find_next_reservable_window(
850 rsv_window_remove(sb, my_rsv); 850 rsv_window_remove(sb, my_rsv);
851 851
852 /* 852 /*
853 * Let's book the whole avaliable window for now. We will check the 853 * Let's book the whole available window for now. We will check the
854 * disk bitmap later and then, if there are free blocks then we adjust 854 * disk bitmap later and then, if there are free blocks then we adjust
855 * the window size if it's larger than requested. 855 * the window size if it's larger than requested.
856 * Otherwise, we will remove this node from the tree next time 856 * Otherwise, we will remove this node from the tree next time
@@ -1357,9 +1357,9 @@ retry_alloc:
1357 goto allocated; 1357 goto allocated;
1358 } 1358 }
1359 /* 1359 /*
1360 * We may end up a bogus ealier ENOSPC error due to 1360 * We may end up a bogus earlier ENOSPC error due to
1361 * filesystem is "full" of reservations, but 1361 * filesystem is "full" of reservations, but
1362 * there maybe indeed free blocks avaliable on disk 1362 * there maybe indeed free blocks available on disk
1363 * In this case, we just forget about the reservations 1363 * In this case, we just forget about the reservations
1364 * just do block allocation as without reservations. 1364 * just do block allocation as without reservations.
1365 */ 1365 */
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c47f706878b5..788e09a07f7e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -305,7 +305,7 @@ static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
305 return ind->bh->b_blocknr; 305 return ind->bh->b_blocknr;
306 306
307 /* 307 /*
308 * It is going to be refered from inode itself? OK, just put it into 308 * It is going to be referred from inode itself? OK, just put it into
309 * the same cylinder group then. 309 * the same cylinder group then.
310 */ 310 */
311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); 311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
@@ -913,7 +913,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
913 * 913 *
914 * When we do truncate() we may have to clean the ends of several indirect 914 * When we do truncate() we may have to clean the ends of several indirect
915 * blocks but leave the blocks themselves alive. Block is partially 915 * blocks but leave the blocks themselves alive. Block is partially
916 * truncated if some data below the new i_size is refered from it (and 916 * truncated if some data below the new i_size is referred from it (and
917 * it is on the path to the first completely truncated data block, indeed). 917 * it is on the path to the first completely truncated data block, indeed).
918 * We have to free the top of that path along with everything to the right 918 * We have to free the top of that path along with everything to the right
919 * of the path. Since no allocation past the truncation point is possible 919 * of the path. Since no allocation past the truncation point is possible
@@ -990,7 +990,7 @@ no_top:
990 * @p: array of block numbers 990 * @p: array of block numbers
991 * @q: points immediately past the end of array 991 * @q: points immediately past the end of array
992 * 992 *
993 * We are freeing all blocks refered from that array (numbers are 993 * We are freeing all blocks referred from that array (numbers are
994 * stored as little-endian 32-bit) and updating @inode->i_blocks 994 * stored as little-endian 32-bit) and updating @inode->i_blocks
995 * appropriately. 995 * appropriately.
996 */ 996 */
@@ -1030,7 +1030,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1030 * @q: pointer immediately past the end of array 1030 * @q: pointer immediately past the end of array
1031 * @depth: depth of the branches to free 1031 * @depth: depth of the branches to free
1032 * 1032 *
1033 * We are freeing all blocks refered from these branches (numbers are 1033 * We are freeing all blocks referred from these branches (numbers are
1034 * stored as little-endian 32-bit) and updating @inode->i_blocks 1034 * stored as little-endian 32-bit) and updating @inode->i_blocks
1035 * appropriately. 1035 * appropriately.
1036 */ 1036 */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7731695e65d9..0a78dae7e2cb 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1382,7 +1382,7 @@ static struct dentry *ext2_mount(struct file_system_type *fs_type,
1382 1382
1383/* Read data from quotafile - avoid pagecache and such because we cannot afford 1383/* Read data from quotafile - avoid pagecache and such because we cannot afford
1384 * acquiring the locks... As quota files are never truncated and quota code 1384 * acquiring the locks... As quota files are never truncated and quota code
1385 * itself serializes the operations (and noone else should touch the files) 1385 * itself serializes the operations (and no one else should touch the files)
1386 * we don't have to be afraid of races */ 1386 * we don't have to be afraid of races */
1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, 1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
1388 size_t len, loff_t off) 1388 size_t len, loff_t off)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index c2e4dce984d2..529970617a21 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -35,7 +35,7 @@
35 * +------------------+ 35 * +------------------+
36 * 36 *
37 * The block header is followed by multiple entry descriptors. These entry 37 * The block header is followed by multiple entry descriptors. These entry
38 * descriptors are variable in size, and alligned to EXT2_XATTR_PAD 38 * descriptors are variable in size, and aligned to EXT2_XATTR_PAD
39 * byte boundaries. The entry descriptors are sorted by attribute name, 39 * byte boundaries. The entry descriptors are sorted by attribute name,
40 * so that two extended attribute blocks can be compared efficiently. 40 * so that two extended attribute blocks can be compared efficiently.
41 * 41 *
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 153242187fce..fe52297e31ad 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -590,7 +590,7 @@ do_more:
590 BUFFER_TRACE(debug_bh, "Deleted!"); 590 BUFFER_TRACE(debug_bh, "Deleted!");
591 if (!bh2jh(bitmap_bh)->b_committed_data) 591 if (!bh2jh(bitmap_bh)->b_committed_data)
592 BUFFER_TRACE(debug_bh, 592 BUFFER_TRACE(debug_bh,
593 "No commited data in bitmap"); 593 "No committed data in bitmap");
594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
595 __brelse(debug_bh); 595 __brelse(debug_bh);
596 } 596 }
@@ -1063,7 +1063,7 @@ static int find_next_reservable_window(
1063 rsv_window_remove(sb, my_rsv); 1063 rsv_window_remove(sb, my_rsv);
1064 1064
1065 /* 1065 /*
1066 * Let's book the whole avaliable window for now. We will check the 1066 * Let's book the whole available window for now. We will check the
1067 * disk bitmap later and then, if there are free blocks then we adjust 1067 * disk bitmap later and then, if there are free blocks then we adjust
1068 * the window size if it's larger than requested. 1068 * the window size if it's larger than requested.
1069 * Otherwise, we will remove this node from the tree next time 1069 * Otherwise, we will remove this node from the tree next time
@@ -1456,7 +1456,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1456 * 1456 *
1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1458 * it is profitable to retry the operation, this function will wait 1458 * it is profitable to retry the operation, this function will wait
1459 * for the current or commiting transaction to complete, and then 1459 * for the current or committing transaction to complete, and then
1460 * return TRUE. 1460 * return TRUE.
1461 * 1461 *
1462 * if the total number of retries exceed three times, return FALSE. 1462 * if the total number of retries exceed three times, return FALSE.
@@ -1632,9 +1632,9 @@ retry_alloc:
1632 goto allocated; 1632 goto allocated;
1633 } 1633 }
1634 /* 1634 /*
1635 * We may end up a bogus ealier ENOSPC error due to 1635 * We may end up a bogus earlier ENOSPC error due to
1636 * filesystem is "full" of reservations, but 1636 * filesystem is "full" of reservations, but
1637 * there maybe indeed free blocks avaliable on disk 1637 * there maybe indeed free blocks available on disk
1638 * In this case, we just forget about the reservations 1638 * In this case, we just forget about the reservations
1639 * just do block allocation as without reservations. 1639 * just do block allocation as without reservations.
1640 */ 1640 */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fe2541d250e4..68b2e43d7c35 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2055,7 +2055,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
2055 * 2055 *
2056 * When we do truncate() we may have to clean the ends of several 2056 * When we do truncate() we may have to clean the ends of several
2057 * indirect blocks but leave the blocks themselves alive. Block is 2057 * indirect blocks but leave the blocks themselves alive. Block is
2058 * partially truncated if some data below the new i_size is refered 2058 * partially truncated if some data below the new i_size is referred
2059 * from it (and it is on the path to the first completely truncated 2059 * from it (and it is on the path to the first completely truncated
2060 * data block, indeed). We have to free the top of that path along 2060 * data block, indeed). We have to free the top of that path along
2061 * with everything to the right of the path. Since no allocation 2061 * with everything to the right of the path. Since no allocation
@@ -2184,7 +2184,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2184 * @first: array of block numbers 2184 * @first: array of block numbers
2185 * @last: points immediately past the end of array 2185 * @last: points immediately past the end of array
2186 * 2186 *
2187 * We are freeing all blocks refered from that array (numbers are stored as 2187 * We are freeing all blocks referred from that array (numbers are stored as
2188 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 2188 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
2189 * 2189 *
2190 * We accumulate contiguous runs of blocks to free. Conveniently, if these 2190 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -2272,7 +2272,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2272 * @last: pointer immediately past the end of array 2272 * @last: pointer immediately past the end of array
2273 * @depth: depth of the branches to free 2273 * @depth: depth of the branches to free
2274 * 2274 *
2275 * We are freeing all blocks refered from these branches (numbers are 2275 * We are freeing all blocks referred from these branches (numbers are
2276 * stored as little-endian 32-bit) and updating @inode->i_blocks 2276 * stored as little-endian 32-bit) and updating @inode->i_blocks
2277 * appropriately. 2277 * appropriately.
2278 */ 2278 */
@@ -3291,7 +3291,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3291 if (ext3_should_journal_data(inode)) 3291 if (ext3_should_journal_data(inode))
3292 ret = 3 * (bpp + indirects) + 2; 3292 ret = 3 * (bpp + indirects) + 2;
3293 else 3293 else
3294 ret = 2 * (bpp + indirects) + 2; 3294 ret = 2 * (bpp + indirects) + indirects + 2;
3295 3295
3296#ifdef CONFIG_QUOTA 3296#ifdef CONFIG_QUOTA
3297 /* We know that structure was already allocated during dquot_initialize so 3297 /* We know that structure was already allocated during dquot_initialize so
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 108b142e11ed..7916e4ce166a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -1009,7 +1009,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1009 1009
1010 if (test_opt(sb, DEBUG)) 1010 if (test_opt(sb, DEBUG))
1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK 1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1012 " upto "E3FSBLK" blocks\n", 1012 " up to "E3FSBLK" blocks\n",
1013 o_blocks_count, n_blocks_count); 1013 o_blocks_count, n_blocks_count);
1014 1014
1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 071689f86e18..3c6a9e0eadc1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2925,7 +2925,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2925 2925
2926/* Read data from quotafile - avoid pagecache and such because we cannot afford 2926/* Read data from quotafile - avoid pagecache and such because we cannot afford
2927 * acquiring the locks... As quota files are never truncated and quota code 2927 * acquiring the locks... As quota files are never truncated and quota code
2928 * itself serializes the operations (and noone else should touch the files) 2928 * itself serializes the operations (and no one else should touch the files)
2929 * we don't have to be afraid of races */ 2929 * we don't have to be afraid of races */
2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2931 size_t len, loff_t off) 2931 size_t len, loff_t off)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 97b970e7dd13..1c67139ad4b4 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -547,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
547 * 547 *
548 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 548 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
549 * it is profitable to retry the operation, this function will wait 549 * it is profitable to retry the operation, this function will wait
550 * for the current or commiting transaction to complete, and then 550 * for the current or committing transaction to complete, and then
551 * return TRUE. 551 * return TRUE.
552 * 552 *
553 * if the total number of retries exceed three times, return FALSE. 553 * if the total number of retries exceed three times, return FALSE.
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index e25e99bf7ee1..d0f53538a57f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,8 +86,8 @@
86 86
87#ifdef CONFIG_QUOTA 87#ifdef CONFIG_QUOTA
88/* Amount of blocks needed for quota update - we know that the structure was 88/* Amount of blocks needed for quota update - we know that the structure was
89 * allocated so we need to update only inode+data */ 89 * allocated so we need to update only data block */
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) 90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
91/* Amount of blocks needed for quota insert/delete - we do some block writes 91/* Amount of blocks needed for quota insert/delete - we do some block writes
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dd2cb5076ff9..4890d6f3ad15 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1729,7 +1729,7 @@ repeat:
1729 BUG_ON(npath->p_depth != path->p_depth); 1729 BUG_ON(npath->p_depth != path->p_depth);
1730 eh = npath[depth].p_hdr; 1730 eh = npath[depth].p_hdr;
1731 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1731 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1732 ext_debug("next leaf isnt full(%d)\n", 1732 ext_debug("next leaf isn't full(%d)\n",
1733 le16_to_cpu(eh->eh_entries)); 1733 le16_to_cpu(eh->eh_entries));
1734 path = npath; 1734 path = npath;
1735 goto repeat; 1735 goto repeat;
@@ -2533,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2533/* 2533/*
2534 * This function is called by ext4_ext_map_blocks() if someone tries to write 2534 * This function is called by ext4_ext_map_blocks() if someone tries to write
2535 * to an uninitialized extent. It may result in splitting the uninitialized 2535 * to an uninitialized extent. It may result in splitting the uninitialized
2536 * extent into multiple extents (upto three - one initialized and two 2536 * extent into multiple extents (up to three - one initialized and two
2537 * uninitialized). 2537 * uninitialized).
2538 * There are three possibilities: 2538 * There are three possibilities:
2539 * a> There is no split required: Entire extent should be initialized 2539 * a> There is no split required: Entire extent should be initialized
@@ -3174,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 path, flags); 3174 path, flags);
3175 /* 3175 /*
3176 * Flag the inode(non aio case) or end_io struct (aio case) 3176 * Flag the inode(non aio case) or end_io struct (aio case)
3177 * that this IO needs to convertion to written when IO is 3177 * that this IO needs to conversion to written when IO is
3178 * completed 3178 * completed
3179 */ 3179 */
3180 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3180 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3460,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3460 ext4_ext_mark_uninitialized(&newex); 3460 ext4_ext_mark_uninitialized(&newex);
3461 /* 3461 /*
3462 * io_end structure was created for every IO write to an 3462 * io_end structure was created for every IO write to an
3463 * uninitialized extent. To avoid unecessary conversion, 3463 * uninitialized extent. To avoid unnecessary conversion,
3464 * here we flag the IO that really needs the conversion. 3464 * here we flag the IO that really needs the conversion.
3465 * For non asycn direct IO case, flag the inode state 3465 * For non asycn direct IO case, flag the inode state
3466 * that we need to perform convertion when IO is done. 3466 * that we need to perform conversion when IO is done.
3467 */ 3467 */
3468 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3468 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3469 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3469 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7f74019d6d77..e9473cbe80df 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode)
101 * to the work-to-be schedule is freed. 101 * to the work-to-be schedule is freed.
102 * 102 *
103 * Thus we need to keep the io structure still valid here after 103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to 104 * conversion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work 105 * avoid double converting from both fsync and background work
106 * queue work. 106 * queue work.
107 */ 107 */
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
125 * the parent directory's parent as well, and so on recursively, if 125 * the parent directory's parent as well, and so on recursively, if
126 * they are also freshly created. 126 * they are also freshly created.
127 */ 127 */
128static void ext4_sync_parent(struct inode *inode) 128static int ext4_sync_parent(struct inode *inode)
129{ 129{
130 struct writeback_control wbc;
130 struct dentry *dentry = NULL; 131 struct dentry *dentry = NULL;
132 int ret = 0;
131 133
132 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
133 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
136 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 138 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
137 break; 139 break;
138 inode = dentry->d_parent->d_inode; 140 inode = dentry->d_parent->d_inode;
139 sync_mapping_buffers(inode->i_mapping); 141 ret = sync_mapping_buffers(inode->i_mapping);
142 if (ret)
143 break;
144 memset(&wbc, 0, sizeof(wbc));
145 wbc.sync_mode = WB_SYNC_ALL;
146 wbc.nr_to_write = 0; /* only write out the inode */
147 ret = sync_inode(inode, &wbc);
148 if (ret)
149 break;
140 } 150 }
151 return ret;
141} 152}
142 153
143/* 154/*
@@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)
176 if (!journal) { 187 if (!journal) {
177 ret = generic_file_fsync(file, datasync); 188 ret = generic_file_fsync(file, datasync);
178 if (!ret && !list_empty(&inode->i_dentry)) 189 if (!ret && !list_empty(&inode->i_dentry))
179 ext4_sync_parent(inode); 190 ret = ext4_sync_parent(inode);
180 goto out; 191 goto out;
181 } 192 }
182 193
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1a86282b9024..f2fa5e8a582c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2502 * for partial write. 2502 * for partial write.
2503 */ 2503 */
2504 set_buffer_new(bh); 2504 set_buffer_new(bh);
2505 set_buffer_mapped(bh);
2505 } 2506 }
2506 return 0; 2507 return 0;
2507} 2508}
@@ -2588,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2588 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2589 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2589 * need to file the inode to the transaction's list in ordered mode because if 2590 * need to file the inode to the transaction's list in ordered mode because if
2590 * we are writing back data added by write(), the inode is already there and if 2591 * we are writing back data added by write(), the inode is already there and if
2591 * we are writing back data modified via mmap(), noone guarantees in which 2592 * we are writing back data modified via mmap(), no one guarantees in which
2592 * transaction the data will hit the disk. In case we are journaling data, we 2593 * transaction the data will hit the disk. In case we are journaling data, we
2593 * cannot start transaction directly because transaction start ranks above page 2594 * cannot start transaction directly because transaction start ranks above page
2594 * lock so we have to do some magic. 2595 * lock so we have to do some magic.
@@ -2690,7 +2691,7 @@ static int ext4_writepage(struct page *page,
2690 2691
2691/* 2692/*
2692 * This is called via ext4_da_writepages() to 2693 * This is called via ext4_da_writepages() to
2693 * calulate the total number of credits to reserve to fit 2694 * calculate the total number of credits to reserve to fit
2694 * a single extent allocation into a single transaction, 2695 * a single extent allocation into a single transaction,
2695 * ext4_da_writpeages() will loop calling this before 2696 * ext4_da_writpeages() will loop calling this before
2696 * the block allocation. 2697 * the block allocation.
@@ -3304,7 +3305,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3304 * the pages by calling redirty_page_for_writepage() but that 3305 * the pages by calling redirty_page_for_writepage() but that
3305 * would be ugly in the extreme. So instead we would need to 3306 * would be ugly in the extreme. So instead we would need to
3306 * replicate parts of the code in the above functions, 3307 * replicate parts of the code in the above functions,
3307 * simplifying them becuase we wouldn't actually intend to 3308 * simplifying them because we wouldn't actually intend to
3308 * write out the pages, but rather only collect contiguous 3309 * write out the pages, but rather only collect contiguous
3309 * logical block extents, call the multi-block allocator, and 3310 * logical block extents, call the multi-block allocator, and
3310 * then update the buffer heads with the block allocations. 3311 * then update the buffer heads with the block allocations.
@@ -3694,7 +3695,7 @@ retry:
3694 * 3695 *
3695 * The unwrritten extents will be converted to written when DIO is completed. 3696 * The unwrritten extents will be converted to written when DIO is completed.
3696 * For async direct IO, since the IO may still pending when return, we 3697 * For async direct IO, since the IO may still pending when return, we
3697 * set up an end_io call back function, which will do the convertion 3698 * set up an end_io call back function, which will do the conversion
3698 * when async direct IO completed. 3699 * when async direct IO completed.
3699 * 3700 *
3700 * If the O_DIRECT write will extend the file then add this inode to the 3701 * If the O_DIRECT write will extend the file then add this inode to the
@@ -3717,7 +3718,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3717 * We could direct write to holes and fallocate. 3718 * We could direct write to holes and fallocate.
3718 * 3719 *
3719 * Allocated blocks to fill the hole are marked as uninitialized 3720 * Allocated blocks to fill the hole are marked as uninitialized
3720 * to prevent paralel buffered read to expose the stale data 3721 * to prevent parallel buffered read to expose the stale data
3721 * before DIO complete the data IO. 3722 * before DIO complete the data IO.
3722 * 3723 *
3723 * As to previously fallocated extents, ext4 get_block 3724 * As to previously fallocated extents, ext4 get_block
@@ -3778,7 +3779,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3778 int err; 3779 int err;
3779 /* 3780 /*
3780 * for non AIO case, since the IO is already 3781 * for non AIO case, since the IO is already
3781 * completed, we could do the convertion right here 3782 * completed, we could do the conversion right here
3782 */ 3783 */
3783 err = ext4_convert_unwritten_extents(inode, 3784 err = ext4_convert_unwritten_extents(inode,
3784 offset, ret); 3785 offset, ret);
@@ -4025,7 +4026,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
4025 * 4026 *
4026 * When we do truncate() we may have to clean the ends of several 4027 * When we do truncate() we may have to clean the ends of several
4027 * indirect blocks but leave the blocks themselves alive. Block is 4028 * indirect blocks but leave the blocks themselves alive. Block is
4028 * partially truncated if some data below the new i_size is refered 4029 * partially truncated if some data below the new i_size is referred
4029 * from it (and it is on the path to the first completely truncated 4030 * from it (and it is on the path to the first completely truncated
4030 * data block, indeed). We have to free the top of that path along 4031 * data block, indeed). We have to free the top of that path along
4031 * with everything to the right of the path. Since no allocation 4032 * with everything to the right of the path. Since no allocation
@@ -4169,7 +4170,7 @@ out_err:
4169 * @first: array of block numbers 4170 * @first: array of block numbers
4170 * @last: points immediately past the end of array 4171 * @last: points immediately past the end of array
4171 * 4172 *
4172 * We are freeing all blocks refered from that array (numbers are stored as 4173 * We are freeing all blocks referred from that array (numbers are stored as
4173 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 4174 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
4174 * 4175 *
4175 * We accumulate contiguous runs of blocks to free. Conveniently, if these 4176 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -4261,7 +4262,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4261 * @last: pointer immediately past the end of array 4262 * @last: pointer immediately past the end of array
4262 * @depth: depth of the branches to free 4263 * @depth: depth of the branches to free
4263 * 4264 *
4264 * We are freeing all blocks refered from these branches (numbers are 4265 * We are freeing all blocks referred from these branches (numbers are
4265 * stored as little-endian 32-bit) and updating @inode->i_blocks 4266 * stored as little-endian 32-bit) and updating @inode->i_blocks
4266 * appropriately. 4267 * appropriately.
4267 */ 4268 */
@@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)
4429 Indirect chain[4]; 4430 Indirect chain[4];
4430 Indirect *partial; 4431 Indirect *partial;
4431 __le32 nr = 0; 4432 __le32 nr = 0;
4432 int n; 4433 int n = 0;
4433 ext4_lblk_t last_block; 4434 ext4_lblk_t last_block, max_block;
4434 unsigned blocksize = inode->i_sb->s_blocksize; 4435 unsigned blocksize = inode->i_sb->s_blocksize;
4435 4436
4436 trace_ext4_truncate_enter(inode); 4437 trace_ext4_truncate_enter(inode);
@@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
4455 4456
4456 last_block = (inode->i_size + blocksize-1) 4457 last_block = (inode->i_size + blocksize-1)
4457 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4458 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4459 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4460 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4458 4461
4459 if (inode->i_size & (blocksize - 1)) 4462 if (inode->i_size & (blocksize - 1))
4460 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4463 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4461 goto out_stop; 4464 goto out_stop;
4462 4465
4463 n = ext4_block_to_path(inode, last_block, offsets, NULL); 4466 if (last_block != max_block) {
4464 if (n == 0) 4467 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4465 goto out_stop; /* error */ 4468 if (n == 0)
4469 goto out_stop; /* error */
4470 }
4466 4471
4467 /* 4472 /*
4468 * OK. This truncate is going to happen. We add the inode to the 4473 * OK. This truncate is going to happen. We add the inode to the
@@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
4493 */ 4498 */
4494 ei->i_disksize = inode->i_size; 4499 ei->i_disksize = inode->i_size;
4495 4500
4496 if (n == 1) { /* direct blocks */ 4501 if (last_block == max_block) {
4502 /*
4503 * It is unnecessary to free any data blocks if last_block is
4504 * equal to the indirect block limit.
4505 */
4506 goto out_unlock;
4507 } else if (n == 1) { /* direct blocks */
4497 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4508 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4498 i_data + EXT4_NDIR_BLOCKS); 4509 i_data + EXT4_NDIR_BLOCKS);
4499 goto do_indirects; 4510 goto do_indirects;
@@ -4553,6 +4564,7 @@ do_indirects:
4553 ; 4564 ;
4554 } 4565 }
4555 4566
4567out_unlock:
4556 up_write(&ei->i_data_sem); 4568 up_write(&ei->i_data_sem);
4557 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4569 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4558 ext4_mark_inode_dirty(handle, inode); 4570 ext4_mark_inode_dirty(handle, inode);
@@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5398 /* if nrblocks are contiguous */ 5410 /* if nrblocks are contiguous */
5399 if (chunk) { 5411 if (chunk) {
5400 /* 5412 /*
5401 * With N contiguous data blocks, it need at most 5413 * With N contiguous data blocks, we need at most
5402 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5414 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5403 * 2 dindirect blocks 5415 * 2 dindirect blocks, and 1 tindirect block
5404 * 1 tindirect block
5405 */ 5416 */
5406 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5417 return DIV_ROUND_UP(nrblocks,
5407 return indirects + 3; 5418 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5408 } 5419 }
5409 /* 5420 /*
5410 * if nrblocks are not contiguous, worse case, each block touch 5421 * if nrblocks are not contiguous, worse case, each block touch
@@ -5478,7 +5489,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5478} 5489}
5479 5490
5480/* 5491/*
5481 * Calulate the total number of credits to reserve to fit 5492 * Calculate the total number of credits to reserve to fit
5482 * the modification of a single pages into a single transaction, 5493 * the modification of a single pages into a single transaction,
5483 * which may include multiple chunks of block allocations. 5494 * which may include multiple chunks of block allocations.
5484 * 5495 *
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a5837a837a8b..d8a16eecf1d5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -92,7 +92,7 @@
92 * between CPUs. It is possible to get scheduled at this point. 92 * between CPUs. It is possible to get scheduled at this point.
93 * 93 *
94 * The locality group prealloc space is used looking at whether we have 94 * The locality group prealloc space is used looking at whether we have
95 * enough free space (pa_free) withing the prealloc space. 95 * enough free space (pa_free) within the prealloc space.
96 * 96 *
97 * If we can't allocate blocks via inode prealloc or/and locality group 97 * If we can't allocate blocks via inode prealloc or/and locality group
98 * prealloc then we look at the buddy cache. The buddy cache is represented 98 * prealloc then we look at the buddy cache. The buddy cache is represented
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d1bafa57f483..92816b4e0f16 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode)
517 * start with one credit accounted for 517 * start with one credit accounted for
518 * superblock modification. 518 * superblock modification.
519 * 519 *
520 * For the tmp_inode we already have commited the 520 * For the tmp_inode we already have committed the
521 * trascation that created the inode. Later as and 521 * trascation that created the inode. Later as and
522 * when we add extents we extent the journal 522 * when we add extents we extent the journal
523 */ 523 */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 22546ad7f0ae..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
242 * journal_end calls result in the superblock being marked dirty, so 242 * journal_end calls result in the superblock being marked dirty, so
243 * that sync() will call the filesystem's write_super callback if 243 * that sync() will call the filesystem's write_super callback if
244 * appropriate. 244 * appropriate.
245 *
246 * To avoid j_barrier hold in userspace when a user calls freeze(),
247 * ext4 prevents a new handle from being started by s_frozen, which
248 * is in an upper layer.
245 */ 249 */
246handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 250handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
247{ 251{
248 journal_t *journal; 252 journal_t *journal;
253 handle_t *handle;
249 254
250 if (sb->s_flags & MS_RDONLY) 255 if (sb->s_flags & MS_RDONLY)
251 return ERR_PTR(-EROFS); 256 return ERR_PTR(-EROFS);
252 257
253 vfs_check_frozen(sb, SB_FREEZE_TRANS);
254 /* Special case here: if the journal has aborted behind our
255 * backs (eg. EIO in the commit thread), then we still need to
256 * take the FS itself readonly cleanly. */
257 journal = EXT4_SB(sb)->s_journal; 258 journal = EXT4_SB(sb)->s_journal;
258 if (journal) { 259 handle = ext4_journal_current_handle();
259 if (is_journal_aborted(journal)) { 260
260 ext4_abort(sb, "Detected aborted journal"); 261 /*
261 return ERR_PTR(-EROFS); 262 * If a handle has been started, it should be allowed to
262 } 263 * finish, otherwise deadlock could happen between freeze
263 return jbd2_journal_start(journal, nblocks); 264 * and others(e.g. truncate) due to the restart of the
265 * journal handle if the filesystem is forzen and active
266 * handles are not stopped.
267 */
268 if (!handle)
269 vfs_check_frozen(sb, SB_FREEZE_TRANS);
270
271 if (!journal)
272 return ext4_get_nojournal();
273 /*
274 * Special case here: if the journal has aborted behind our
275 * backs (eg. EIO in the commit thread), then we still need to
276 * take the FS itself readonly cleanly.
277 */
278 if (is_journal_aborted(journal)) {
279 ext4_abort(sb, "Detected aborted journal");
280 return ERR_PTR(-EROFS);
264 } 281 }
265 return ext4_get_nojournal(); 282 return jbd2_journal_start(journal, nblocks);
266} 283}
267 284
268/* 285/*
@@ -617,7 +634,7 @@ __acquires(bitlock)
617 * filesystem will have already been marked read/only and the 634 * filesystem will have already been marked read/only and the
618 * journal has been aborted. We return 1 as a hint to callers 635 * journal has been aborted. We return 1 as a hint to callers
619 * who might what to use the return value from 636 * who might what to use the return value from
620 * ext4_grp_locked_error() to distinguish beween the 637 * ext4_grp_locked_error() to distinguish between the
621 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 638 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
622 * aggressively from the ext4 function in question, with a 639 * aggressively from the ext4 function in question, with a
623 * more appropriate error code. 640 * more appropriate error code.
@@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
2975 mutex_unlock(&ext4_li_info->li_list_mtx); 2992 mutex_unlock(&ext4_li_info->li_list_mtx);
2976 2993
2977 sbi->s_li_request = elr; 2994 sbi->s_li_request = elr;
2995 /*
2996 * set elr to NULL here since it has been inserted to
2997 * the request_list and the removal and free of it is
2998 * handled by ext4_clear_request_list from now on.
2999 */
3000 elr = NULL;
2978 3001
2979 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3002 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2980 ret = ext4_run_lazyinit_thread(); 3003 ret = ext4_run_lazyinit_thread();
@@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3385 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3408 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3386 spin_lock_init(&sbi->s_next_gen_lock); 3409 spin_lock_init(&sbi->s_next_gen_lock);
3387 3410
3411 init_timer(&sbi->s_err_report);
3412 sbi->s_err_report.function = print_daily_error_info;
3413 sbi->s_err_report.data = (unsigned long) sb;
3414
3388 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3415 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3389 ext4_count_free_blocks(sb)); 3416 ext4_count_free_blocks(sb));
3390 if (!err) { 3417 if (!err) {
@@ -3646,9 +3673,6 @@ no_journal:
3646 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3673 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3647 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3674 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3648 3675
3649 init_timer(&sbi->s_err_report);
3650 sbi->s_err_report.function = print_daily_error_info;
3651 sbi->s_err_report.data = (unsigned long) sb;
3652 if (es->s_error_count) 3676 if (es->s_error_count)
3653 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3677 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3654 3678
@@ -3672,6 +3696,7 @@ failed_mount_wq:
3672 sbi->s_journal = NULL; 3696 sbi->s_journal = NULL;
3673 } 3697 }
3674failed_mount3: 3698failed_mount3:
3699 del_timer(&sbi->s_err_report);
3675 if (sbi->s_flex_groups) { 3700 if (sbi->s_flex_groups) {
3676 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 if (is_vmalloc_addr(sbi->s_flex_groups))
3677 vfree(sbi->s_flex_groups); 3702 vfree(sbi->s_flex_groups);
@@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4138/* 4163/*
4139 * LVM calls this function before a (read-only) snapshot is created. This 4164 * LVM calls this function before a (read-only) snapshot is created. This
4140 * gives us a chance to flush the journal completely and mark the fs clean. 4165 * gives us a chance to flush the journal completely and mark the fs clean.
4166 *
4167 * Note that only this function cannot bring a filesystem to be in a clean
4168 * state independently, because ext4 prevents a new handle from being started
4169 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
4170 * the upper layer.
4141 */ 4171 */
4142static int ext4_freeze(struct super_block *sb) 4172static int ext4_freeze(struct super_block *sb)
4143{ 4173{
@@ -4614,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4614 4644
4615static int ext4_quota_off(struct super_block *sb, int type) 4645static int ext4_quota_off(struct super_block *sb, int type)
4616{ 4646{
4647 struct inode *inode = sb_dqopt(sb)->files[type];
4648 handle_t *handle;
4649
4617 /* Force all delayed allocation blocks to be allocated. 4650 /* Force all delayed allocation blocks to be allocated.
4618 * Caller already holds s_umount sem */ 4651 * Caller already holds s_umount sem */
4619 if (test_opt(sb, DELALLOC)) 4652 if (test_opt(sb, DELALLOC))
4620 sync_filesystem(sb); 4653 sync_filesystem(sb);
4621 4654
4655 /* Update modification times of quota files when userspace can
4656 * start looking at them */
4657 handle = ext4_journal_start(inode, 1);
4658 if (IS_ERR(handle))
4659 goto out;
4660 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4661 ext4_mark_inode_dirty(handle, inode);
4662 ext4_journal_stop(handle);
4663
4664out:
4622 return dquot_quota_off(sb, type); 4665 return dquot_quota_off(sb, type);
4623} 4666}
4624 4667
4625/* Read data from quotafile - avoid pagecache and such because we cannot afford 4668/* Read data from quotafile - avoid pagecache and such because we cannot afford
4626 * acquiring the locks... As quota files are never truncated and quota code 4669 * acquiring the locks... As quota files are never truncated and quota code
4627 * itself serializes the operations (and noone else should touch the files) 4670 * itself serializes the operations (and no one else should touch the files)
4628 * we don't have to be afraid of races */ 4671 * we don't have to be afraid of races */
4629static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4672static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
4630 size_t len, loff_t off) 4673 size_t len, loff_t off)
@@ -4714,9 +4757,8 @@ out:
4714 if (inode->i_size < off + len) { 4757 if (inode->i_size < off + len) {
4715 i_size_write(inode, off + len); 4758 i_size_write(inode, off + len);
4716 EXT4_I(inode)->i_disksize = inode->i_size; 4759 EXT4_I(inode)->i_disksize = inode->i_size;
4760 ext4_mark_inode_dirty(handle, inode);
4717 } 4761 }
4718 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4719 ext4_mark_inode_dirty(handle, inode);
4720 mutex_unlock(&inode->i_mutex); 4762 mutex_unlock(&inode->i_mutex);
4721 return len; 4763 return len;
4722} 4764}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
7#include <linux/exportfs.h> 7#include <linux/exportfs.h>
8#include <linux/fs_struct.h> 8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h> 9#include <linux/fsnotify.h>
10#include <linux/personality.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11#include "internal.h" 12#include "internal.h"
12 13
diff --git a/fs/file.c b/fs/file.c
index 0be344755c02..4c6992d8f3ba 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/mmzone.h>
12#include <linux/time.h> 13#include <linux/time.h>
13#include <linux/sched.h> 14#include <linux/sched.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
@@ -39,14 +40,17 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
39 */ 40 */
40static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
41 42
42static inline void *alloc_fdmem(unsigned int size) 43static void *alloc_fdmem(unsigned int size)
43{ 44{
44 void *data; 45 /*
45 46 * Very large allocations can stress page reclaim, so fall back to
46 data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); 47 * vmalloc() if the allocation size will be considered "large" by the VM.
47 if (data != NULL) 48 */
48 return data; 49 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
49 50 void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
51 if (data != NULL)
52 return data;
53 }
50 return vmalloc(size); 54 return vmalloc(size);
51} 55}
52 56
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
110 *tmp = fs->next; 110 *tmp = fs->next;
111 fs->next = NULL; 111 fs->next = NULL;
112 write_unlock(&file_systems_lock); 112 write_unlock(&file_systems_lock);
113 synchronize_rcu();
113 return 0; 114 return 0;
114 } 115 }
115 tmp = &(*tmp)->next; 116 tmp = &(*tmp)->next;
116 } 117 }
117 write_unlock(&file_systems_lock); 118 write_unlock(&file_systems_lock);
118 119
119 synchronize_rcu();
120
121 return -EINVAL; 120 return -EINVAL;
122} 121}
123 122
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 78948b4b1894..c9a6a94e58e9 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -164,7 +164,7 @@ vxfs_read_fshead(struct super_block *sbp)
164 goto out_free_pfp; 164 goto out_free_pfp;
165 } 165 }
166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { 166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
167 printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n", 167 printk(KERN_ERR "vxfs: structural list inode is of wrong type (%x)\n",
168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); 168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK);
169 goto out_iput_stilist; 169 goto out_iput_stilist;
170 } 170 }
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 6c5131d592f0..3360f1e678ad 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -162,7 +162,7 @@ vxfs_find_entry(struct inode *ip, struct dentry *dp, struct page **ppp)
162/** 162/**
163 * vxfs_inode_by_name - find inode number for dentry 163 * vxfs_inode_by_name - find inode number for dentry
164 * @dip: directory to search in 164 * @dip: directory to search in
165 * @dp: dentry we seach for 165 * @dp: dentry we search for
166 * 166 *
167 * Description: 167 * Description:
168 * vxfs_inode_by_name finds out the inode number of 168 * vxfs_inode_by_name finds out the inode number of
diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h
index d8324296486f..b7b3af502615 100644
--- a/fs/freevxfs/vxfs_olt.h
+++ b/fs/freevxfs/vxfs_olt.h
@@ -60,7 +60,7 @@ enum {
60 * 60 *
61 * The Object Location Table header is placed at the beginning of each 61 * The Object Location Table header is placed at the beginning of each
62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g. 62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g.
63 * the inital inode list, the fileset header or the device configuration. 63 * the initial inode list, the fileset header or the device configuration.
64 */ 64 */
65struct vxfs_olt { 65struct vxfs_olt {
66 u_int32_t olt_magic; /* magic number */ 66 u_int32_t olt_magic; /* magic number */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b5ed541fb137..34591ee804b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -144,7 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
144 * 144 *
145 * Description: 145 * Description:
146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
147 * started when this function returns, we make no guarentees on 147 * started when this function returns, we make no guarantees on
148 * completion. Caller need not hold sb s_umount semaphore. 148 * completion. Caller need not hold sb s_umount semaphore.
149 * 149 *
150 */ 150 */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c6ba49bd95b3..b32eb29a4e6f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
174 if (!inode) 174 if (!inode)
175 return 0; 175 return 0;
176 176
177 if (nd->flags & LOOKUP_RCU) 177 if (nd && (nd->flags & LOOKUP_RCU))
178 return -ECHILD; 178 return -ECHILD;
179 179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6ea00734984e..82a66466a24c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -523,7 +523,7 @@ static int fuse_readpage(struct file *file, struct page *page)
523 goto out; 523 goto out;
524 524
525 /* 525 /*
526 * Page writeback can extend beyond the liftime of the 526 * Page writeback can extend beyond the lifetime of the
527 * page-cache page, so make sure we read a properly synced 527 * page-cache page, so make sure we read a properly synced
528 * page. 528 * page.
529 */ 529 */
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1ccflags-y := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o inode.o quota.o \
7 recovery.o rgrp.o super.o sys.o trans.o util.o 7 recovery.o rgrp.o super.o sys.o trans.o util.o
8 8
9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o 9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c71995b111bf..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
884 } 884 }
885 885
886 brelse(dibh); 886 brelse(dibh);
887 gfs2_trans_end(sdp);
888failed: 887failed:
888 gfs2_trans_end(sdp);
889 if (al) { 889 if (al) {
890 gfs2_inplace_release(ip); 890 gfs2_inplace_release(ip);
891 gfs2_quota_unlock(ip); 891 gfs2_quota_unlock(ip);
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1076 bd = bh->b_private; 1076 bd = bh->b_private;
1077 if (bd && bd->bd_ail) 1077 if (bd && bd->bd_ail)
1078 goto cannot_release; 1078 goto cannot_release;
1079 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 1079 if (buffer_pinned(bh) || buffer_dirty(bh))
1080 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 1080 goto not_possible;
1081 bh = bh->b_this_page; 1081 bh = bh->b_this_page;
1082 } while(bh != head); 1082 } while(bh != head);
1083 gfs2_log_unlock(sdp); 1083 gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1107 } while (bh != head); 1107 } while (bh != head);
1108 1108
1109 return try_to_free_buffers(page); 1109 return try_to_free_buffers(page);
1110
1111not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh));
1110cannot_release: 1114cannot_release:
1111 gfs2_log_unlock(sdp); 1115 gfs2_log_unlock(sdp);
1112 return 0; 1116 return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ef3dc4b9fae2..74add2ddcc3f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1136,7 +1136,7 @@ void gfs2_trim_blocks(struct inode *inode)
1136 * earlier versions of GFS2 have a bug in the stuffed file reading 1136 * earlier versions of GFS2 have a bug in the stuffed file reading
1137 * code which will result in a buffer overrun if the size is larger 1137 * code which will result in a buffer overrun if the size is larger
1138 * than the max stuffed file size. In order to prevent this from 1138 * than the max stuffed file size. In order to prevent this from
1139 * occuring, such files are unstuffed, but in other cases we can 1139 * occurring, such files are unstuffed, but in other cases we can
1140 * just update the inode size directly. 1140 * just update the inode size directly.
1141 * 1141 *
1142 * Returns: 0 on success, or -ve on error 1142 * Returns: 0 on success, or -ve on error
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5c356d09c321..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
82struct qstr gfs2_qdot __read_mostly; 82struct qstr gfs2_qdot __read_mostly;
83struct qstr gfs2_qdotdot __read_mostly; 83struct qstr gfs2_qdotdot __read_mostly;
84 84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
86 u64 leaf_no, void *data);
87typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88 const struct qstr *name, void *opaque); 86 const struct qstr *name, void *opaque);
89 87
90
91int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
92 struct buffer_head **bhp) 89 struct buffer_head **bhp)
93{ 90{
@@ -1506,7 +1503,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1506 inode = gfs2_inode_lookup(dir->i_sb, 1503 inode = gfs2_inode_lookup(dir->i_sb,
1507 be16_to_cpu(dent->de_type), 1504 be16_to_cpu(dent->de_type),
1508 be64_to_cpu(dent->de_inum.no_addr), 1505 be64_to_cpu(dent->de_inum.no_addr),
1509 be64_to_cpu(dent->de_inum.no_formal_ino)); 1506 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1510 brelse(bh); 1507 brelse(bh);
1511 return inode; 1508 return inode;
1512 } 1509 }
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1600 */ 1597 */
1601 1598
1602int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1603 const struct gfs2_inode *nip, unsigned type) 1600 const struct gfs2_inode *nip)
1604{ 1601{
1605 struct gfs2_inode *ip = GFS2_I(inode); 1602 struct gfs2_inode *ip = GFS2_I(inode);
1606 struct buffer_head *bh; 1603 struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 return PTR_ERR(dent); 1613 return PTR_ERR(dent);
1617 dent = gfs2_init_dirent(inode, dent, name, bh); 1614 dent = gfs2_init_dirent(inode, dent, name, bh);
1618 gfs2_inum_out(nip, dent); 1615 gfs2_inum_out(nip, dent);
1619 dent->de_type = cpu_to_be16(type); 1616 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1620 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1617 if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1621 leaf = (struct gfs2_leaf *)bh->b_data; 1618 leaf = (struct gfs2_leaf *)bh->b_data;
1622 be16_add_cpu(&leaf->lf_entries, 1); 1619 be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1628 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1625 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1629 ip->i_entries++; 1626 ip->i_entries++;
1630 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628 if (S_ISDIR(nip->i_inode.i_mode))
1629 inc_nlink(&ip->i_inode);
1631 gfs2_dinode_out(ip, bh->b_data); 1630 gfs2_dinode_out(ip, bh->b_data);
1632 brelse(bh); 1631 brelse(bh);
1633 error = 0; 1632 error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1672 * Returns: 0 on success, error code on failure 1671 * Returns: 0 on success, error code on failure
1673 */ 1672 */
1674 1673
1675int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1676{ 1675{
1676 const struct qstr *name = &dentry->d_name;
1677 struct gfs2_dirent *dent, *prev = NULL; 1677 struct gfs2_dirent *dent, *prev = NULL;
1678 struct buffer_head *bh; 1678 struct buffer_head *bh;
1679 int error; 1679 int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1714 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1714 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715 dip->i_entries--; 1715 dip->i_entries--;
1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717 if (S_ISDIR(dentry->d_inode->i_mode))
1718 drop_nlink(&dip->i_inode);
1717 gfs2_dinode_out(dip, bh->b_data); 1719 gfs2_dinode_out(dip, bh->b_data);
1718 brelse(bh); 1720 brelse(bh);
1719 mark_inode_dirty(&dip->i_inode); 1721 mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1768} 1770}
1769 1771
1770/** 1772/**
1771 * foreach_leaf - call a function for each leaf in a directory
1772 * @dip: the directory
1773 * @lc: the function to call for each each
1774 * @data: private data to pass to it
1775 *
1776 * Returns: errno
1777 */
1778
1779static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1780{
1781 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1782 struct buffer_head *bh;
1783 struct gfs2_leaf *leaf;
1784 u32 hsize, len;
1785 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1786 u32 index = 0;
1787 __be64 *lp;
1788 u64 leaf_no;
1789 int error = 0;
1790
1791 hsize = 1 << dip->i_depth;
1792 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1793 gfs2_consist_inode(dip);
1794 return -EIO;
1795 }
1796
1797 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1798 if (!lp)
1799 return -ENOMEM;
1800
1801 while (index < hsize) {
1802 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1803 ht_offset = index - lp_offset;
1804
1805 if (ht_offset_cur != ht_offset) {
1806 error = gfs2_dir_read_data(dip, (char *)lp,
1807 ht_offset * sizeof(__be64),
1808 sdp->sd_hash_bsize, 1);
1809 if (error != sdp->sd_hash_bsize) {
1810 if (error >= 0)
1811 error = -EIO;
1812 goto out;
1813 }
1814 ht_offset_cur = ht_offset;
1815 }
1816
1817 leaf_no = be64_to_cpu(lp[lp_offset]);
1818 if (leaf_no) {
1819 error = get_leaf(dip, leaf_no, &bh);
1820 if (error)
1821 goto out;
1822 leaf = (struct gfs2_leaf *)bh->b_data;
1823 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1824 brelse(bh);
1825
1826 error = lc(dip, index, len, leaf_no, data);
1827 if (error)
1828 goto out;
1829
1830 index = (index & ~(len - 1)) + len;
1831 } else
1832 index++;
1833 }
1834
1835 if (index != hsize) {
1836 gfs2_consist_inode(dip);
1837 error = -EIO;
1838 }
1839
1840out:
1841 kfree(lp);
1842
1843 return error;
1844}
1845
1846/**
1847 * leaf_dealloc - Deallocate a directory leaf 1773 * leaf_dealloc - Deallocate a directory leaf
1848 * @dip: the directory 1774 * @dip: the directory
1849 * @index: the hash table offset in the directory 1775 * @index: the hash table offset in the directory
1850 * @len: the number of pointers to this leaf 1776 * @len: the number of pointers to this leaf
1851 * @leaf_no: the leaf number 1777 * @leaf_no: the leaf number
1852 * @data: not used 1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1853 * 1780 *
1854 * Returns: errno 1781 * Returns: errno
1855 */ 1782 */
1856 1783
1857static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1858 u64 leaf_no, void *data) 1785 u64 leaf_no, struct buffer_head *leaf_bh,
1786 int last_dealloc)
1859{ 1787{
1860 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1788 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1861 struct gfs2_leaf *tmp_leaf; 1789 struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1887 goto out_qs; 1815 goto out_qs;
1888 1816
1889 /* Count the number of leaves */ 1817 /* Count the number of leaves */
1818 bh = leaf_bh;
1890 1819
1891 for (blk = leaf_no; blk; blk = nblk) { 1820 for (blk = leaf_no; blk; blk = nblk) {
1892 error = get_leaf(dip, blk, &bh); 1821 if (blk != leaf_no) {
1893 if (error) 1822 error = get_leaf(dip, blk, &bh);
1894 goto out_rlist; 1823 if (error)
1824 goto out_rlist;
1825 }
1895 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1826 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1896 nblk = be64_to_cpu(tmp_leaf->lf_next); 1827 nblk = be64_to_cpu(tmp_leaf->lf_next);
1897 brelse(bh); 1828 if (blk != leaf_no)
1829 brelse(bh);
1898 1830
1899 gfs2_rlist_add(sdp, &rlist, blk); 1831 gfs2_rlist_add(sdp, &rlist, blk);
1900 l_blocks++; 1832 l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1918 if (error) 1850 if (error)
1919 goto out_rg_gunlock; 1851 goto out_rg_gunlock;
1920 1852
1853 bh = leaf_bh;
1854
1921 for (blk = leaf_no; blk; blk = nblk) { 1855 for (blk = leaf_no; blk; blk = nblk) {
1922 error = get_leaf(dip, blk, &bh); 1856 if (blk != leaf_no) {
1923 if (error) 1857 error = get_leaf(dip, blk, &bh);
1924 goto out_end_trans; 1858 if (error)
1859 goto out_end_trans;
1860 }
1925 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1926 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 nblk = be64_to_cpu(tmp_leaf->lf_next);
1927 brelse(bh); 1863 if (blk != leaf_no)
1864 brelse(bh);
1928 1865
1929 gfs2_free_meta(dip, blk, 1); 1866 gfs2_free_meta(dip, blk, 1);
1930 gfs2_add_inode_blocks(&dip->i_inode, -1); 1867 gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1942 goto out_end_trans; 1879 goto out_end_trans;
1943 1880
1944 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1881 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882 /* On the last dealloc, make this a regular file in case we crash.
1883 (We don't want to free these blocks a second time.) */
1884 if (last_dealloc)
1885 dip->i_inode.i_mode = S_IFREG;
1945 gfs2_dinode_out(dip, dibh->b_data); 1886 gfs2_dinode_out(dip, dibh->b_data);
1946 brelse(dibh); 1887 brelse(dibh);
1947 1888
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1975{ 1916{
1976 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1977 struct buffer_head *bh; 1918 struct buffer_head *bh;
1978 int error; 1919 struct gfs2_leaf *leaf;
1920 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index;
1923 __be64 *lp;
1924 u64 leaf_no;
1925 int error = 0, last;
1979 1926
1980 /* Dealloc on-disk leaves to FREEMETA state */ 1927 hsize = 1 << dip->i_depth;
1981 error = foreach_leaf(dip, leaf_dealloc, NULL); 1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1982 if (error) 1929 gfs2_consist_inode(dip);
1983 return error; 1930 return -EIO;
1931 }
1984 1932
1985 /* Make this a regular file in case we crash. 1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1986 (We don't want to free these blocks a second time.) */ 1934 if (!lp)
1935 return -ENOMEM;
1987 1936
1988 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1937 while (index < hsize) {
1989 if (error) 1938 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1990 return error; 1939 ht_offset = index - lp_offset;
1991 1940
1992 error = gfs2_meta_inode_buffer(dip, &bh); 1941 if (ht_offset_cur != ht_offset) {
1993 if (!error) { 1942 error = gfs2_dir_read_data(dip, (char *)lp,
1994 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1943 ht_offset * sizeof(__be64),
1995 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1944 sdp->sd_hash_bsize, 1);
1996 cpu_to_be32(S_IFREG); 1945 if (error != sdp->sd_hash_bsize) {
1997 brelse(bh); 1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh);
1956 if (error)
1957 goto out;
1958 leaf = (struct gfs2_leaf *)bh->b_data;
1959 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961 next_index = (index & ~(len - 1)) + len;
1962 last = ((next_index >= hsize) ? 1 : 0);
1963 error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964 last);
1965 brelse(bh);
1966 if (error)
1967 goto out;
1968 index = next_index;
1969 } else
1970 index++;
1998 } 1971 }
1999 1972
2000 gfs2_trans_end(sdp); 1973 if (index != hsize) {
1974 gfs2_consist_inode(dip);
1975 error = -EIO;
1976 }
1977
1978out:
1979 kfree(lp);
2001 1980
2002 return error; 1981 return error;
2003} 1982}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, 22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
23 const struct gfs2_inode *ip); 23 const struct gfs2_inode *ip);
24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip, unsigned int type); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
139 struct gfs2_sbd *sdp = sb->s_fs_info; 139 struct gfs2_sbd *sdp = sb->s_fs_info;
140 struct inode *inode; 140 struct inode *inode;
141 141
142 inode = gfs2_ilookup(sb, inum->no_addr); 142 inode = gfs2_ilookup(sb, inum->no_addr, 0);
143 if (inode) { 143 if (inode) {
144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
145 iput(inode); 145 iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b2682e073eee..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
545/** 545/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 546 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 547 * @file: the file that points to the dentry (we ignore this)
548 * @dentry: the dentry that points to the inode to sync 548 * @datasync: set if we can ignore timestamp changes
549 * 549 *
550 * The VFS will flush "normal" data for us. We only need to worry 550 * The VFS will flush data for us. We only need to worry
551 * about metadata here. For journaled data, we just do a log flush 551 * about metadata here.
552 * as we can't avoid it. Otherwise we can just bale out if datasync
553 * is set. For stuffed inodes we must flush the log in order to
554 * ensure that all data is on disk.
555 *
556 * The call to write_inode_now() is there to write back metadata and
557 * the inode itself. It does also try and write the data, but thats
558 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
559 * for us.
560 * 552 *
561 * Returns: errno 553 * Returns: errno
562 */ 554 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
565{ 557{
566 struct inode *inode = file->f_mapping->host; 558 struct inode *inode = file->f_mapping->host;
567 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
568 int ret = 0; 560 struct gfs2_inode *ip = GFS2_I(inode);
569 561 int ret;
570 if (gfs2_is_jdata(GFS2_I(inode))) {
571 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
572 return 0;
573 }
574 562
575 if (sync_state != 0) { 563 if (datasync)
576 if (!datasync) 564 sync_state &= ~I_DIRTY_SYNC;
577 ret = write_inode_now(inode, 0);
578 565
579 if (gfs2_is_stuffed(GFS2_I(inode))) 566 if (sync_state) {
580 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 567 ret = sync_inode_metadata(inode, 1);
568 if (ret)
569 return ret;
570 gfs2_ail_flush(ip->i_gl);
581 } 571 }
582 572
583 return ret; 573 return 0;
584} 574}
585 575
586/** 576/**
@@ -617,18 +607,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
617 return generic_file_aio_write(iocb, iov, nr_segs, pos); 607 return generic_file_aio_write(iocb, iov, nr_segs, pos);
618} 608}
619 609
620static void empty_write_end(struct page *page, unsigned from, 610static int empty_write_end(struct page *page, unsigned from,
621 unsigned to) 611 unsigned to, int mode)
622{ 612{
623 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 613 struct inode *inode = page->mapping->host;
614 struct gfs2_inode *ip = GFS2_I(inode);
615 struct buffer_head *bh;
616 unsigned offset, blksize = 1 << inode->i_blkbits;
617 pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
624 618
625 zero_user(page, from, to-from); 619 zero_user(page, from, to-from);
626 mark_page_accessed(page); 620 mark_page_accessed(page);
627 621
628 if (!gfs2_is_writeback(ip)) 622 if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
629 gfs2_page_add_databufs(ip, page, from, to); 623 if (!gfs2_is_writeback(ip))
624 gfs2_page_add_databufs(ip, page, from, to);
630 625
631 block_commit_write(page, from, to); 626 block_commit_write(page, from, to);
627 return 0;
628 }
629
630 offset = 0;
631 bh = page_buffers(page);
632 while (offset < to) {
633 if (offset >= from) {
634 set_buffer_uptodate(bh);
635 mark_buffer_dirty(bh);
636 clear_buffer_new(bh);
637 write_dirty_buffer(bh, WRITE);
638 }
639 offset += blksize;
640 bh = bh->b_this_page;
641 }
642
643 offset = 0;
644 bh = page_buffers(page);
645 while (offset < to) {
646 if (offset >= from) {
647 wait_on_buffer(bh);
648 if (!buffer_uptodate(bh))
649 return -EIO;
650 }
651 offset += blksize;
652 bh = bh->b_this_page;
653 }
654 return 0;
632} 655}
633 656
634static int needs_empty_write(sector_t block, struct inode *inode) 657static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +666,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
643 return !buffer_mapped(&bh_map); 666 return !buffer_mapped(&bh_map);
644} 667}
645 668
646static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 669static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
670 int mode)
647{ 671{
648 struct inode *inode = page->mapping->host; 672 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize; 673 unsigned start, end, next, blksize;
@@ -668,7 +692,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
668 gfs2_block_map); 692 gfs2_block_map);
669 if (unlikely(ret)) 693 if (unlikely(ret))
670 return ret; 694 return ret;
671 empty_write_end(page, start, end); 695 ret = empty_write_end(page, start, end, mode);
696 if (unlikely(ret))
697 return ret;
672 end = 0; 698 end = 0;
673 } 699 }
674 start = next; 700 start = next;
@@ -682,7 +708,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
682 ret = __block_write_begin(page, start, end - start, gfs2_block_map); 708 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
683 if (unlikely(ret)) 709 if (unlikely(ret))
684 return ret; 710 return ret;
685 empty_write_end(page, start, end); 711 ret = empty_write_end(page, start, end, mode);
712 if (unlikely(ret))
713 return ret;
686 } 714 }
687 715
688 return 0; 716 return 0;
@@ -731,7 +759,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
731 759
732 if (curr == end) 760 if (curr == end)
733 to = end_offset; 761 to = end_offset;
734 error = write_empty_blocks(page, from, to); 762 error = write_empty_blocks(page, from, to, mode);
735 if (!error && offset + to > inode->i_size && 763 if (!error && offset + to > inode->i_size &&
736 !(mode & FALLOC_FL_KEEP_SIZE)) { 764 !(mode & FALLOC_FL_KEEP_SIZE)) {
737 i_size_write(inode, offset + to); 765 i_size_write(inode, offset + to);
@@ -788,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
788 loff_t bytes, max_bytes; 816 loff_t bytes, max_bytes;
789 struct gfs2_alloc *al; 817 struct gfs2_alloc *al;
790 int error; 818 int error;
819 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
791 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 820 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
792 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 821 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
793 822
@@ -795,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
795 if (mode & ~FALLOC_FL_KEEP_SIZE) 824 if (mode & ~FALLOC_FL_KEEP_SIZE)
796 return -EOPNOTSUPP; 825 return -EOPNOTSUPP;
797 826
798 offset = (offset >> sdp->sd_sb.sb_bsize_shift) << 827 offset &= bsize_mask;
799 sdp->sd_sb.sb_bsize_shift;
800 828
801 len = next - offset; 829 len = next - offset;
802 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 830 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
803 if (!bytes) 831 if (!bytes)
804 bytes = UINT_MAX; 832 bytes = UINT_MAX;
833 bytes &= bsize_mask;
834 if (bytes == 0)
835 bytes = sdp->sd_sb.sb_bsize;
805 836
806 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 837 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
807 error = gfs2_glock_nq(&ip->i_gh); 838 error = gfs2_glock_nq(&ip->i_gh);
@@ -832,6 +863,9 @@ retry:
832 if (error) { 863 if (error) {
833 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 864 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
834 bytes >>= 1; 865 bytes >>= 1;
866 bytes &= bsize_mask;
867 if (bytes == 0)
868 bytes = sdp->sd_sb.sb_bsize;
835 goto retry; 869 goto retry;
836 } 870 }
837 goto out_qunlock; 871 goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e2431313491f..a2a6abbccc07 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
93 93
94static inline void spin_lock_bucket(unsigned int hash) 94static inline void spin_lock_bucket(unsigned int hash)
95{ 95{
96 struct hlist_bl_head *bl = &gl_hash_table[hash]; 96 hlist_bl_lock(&gl_hash_table[hash]);
97 bit_spin_lock(0, (unsigned long *)bl);
98} 97}
99 98
100static inline void spin_unlock_bucket(unsigned int hash) 99static inline void spin_unlock_bucket(unsigned int hash)
101{ 100{
102 struct hlist_bl_head *bl = &gl_hash_table[hash]; 101 hlist_bl_unlock(&gl_hash_table[hash]);
103 __bit_spin_unlock(0, (unsigned long *)bl);
104} 102}
105 103
106static void gfs2_glock_dealloc(struct rcu_head *rcu) 104static void gfs2_glock_dealloc(struct rcu_head *rcu)
@@ -145,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
145{ 143{
146 const struct gfs2_glock_operations *glops = gl->gl_ops; 144 const struct gfs2_glock_operations *glops = gl->gl_ops;
147 145
148 /* assert_spin_locked(&gl->gl_spin); */
149
150 if (gl->gl_state == LM_ST_UNLOCKED) 146 if (gl->gl_state == LM_ST_UNLOCKED)
151 return 0; 147 return 0;
152 if (test_bit(GLF_LFLUSH, &gl->gl_flags)) 148 if (!list_empty(&gl->gl_holders))
153 return 0;
154 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
155 !list_empty(&gl->gl_holders))
156 return 0; 149 return 0;
157 if (glops->go_demote_ok) 150 if (glops->go_demote_ok)
158 return glops->go_demote_ok(gl); 151 return glops->go_demote_ok(gl);
@@ -160,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
160} 153}
161 154
162 155
156void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
157{
158 spin_lock(&lru_lock);
159
160 if (!list_empty(&gl->gl_lru))
161 list_del_init(&gl->gl_lru);
162 else
163 atomic_inc(&lru_count);
164
165 list_add_tail(&gl->gl_lru, &lru_list);
166 set_bit(GLF_LRU, &gl->gl_flags);
167 spin_unlock(&lru_lock);
168}
169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags);
177 }
178 spin_unlock(&lru_lock);
179}
180
163/** 181/**
164 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 182 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
165 * @gl: the glock 183 * @gl: the glock
@@ -170,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
170 188
171static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 189static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
172{ 190{
173 if (demote_ok(gl)) { 191 if (demote_ok(gl))
174 spin_lock(&lru_lock); 192 gfs2_glock_add_to_lru(gl);
175
176 if (!list_empty(&gl->gl_lru))
177 list_del_init(&gl->gl_lru);
178 else
179 atomic_inc(&lru_count);
180
181 list_add_tail(&gl->gl_lru, &lru_list);
182 spin_unlock(&lru_lock);
183 }
184}
185
186void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
187{
188 spin_lock(&gl->gl_spin);
189 __gfs2_glock_schedule_for_reclaim(gl);
190 spin_unlock(&gl->gl_spin);
191} 193}
192 194
193/** 195/**
@@ -219,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
219 spin_lock_bucket(gl->gl_hash); 221 spin_lock_bucket(gl->gl_hash);
220 hlist_bl_del_rcu(&gl->gl_list); 222 hlist_bl_del_rcu(&gl->gl_list);
221 spin_unlock_bucket(gl->gl_hash); 223 spin_unlock_bucket(gl->gl_hash);
222 spin_lock(&lru_lock); 224 gfs2_glock_remove_from_lru(gl);
223 if (!list_empty(&gl->gl_lru)) {
224 list_del_init(&gl->gl_lru);
225 atomic_dec(&lru_count);
226 }
227 spin_unlock(&lru_lock);
228 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
229 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
230 trace_gfs2_glock_put(gl); 227 trace_gfs2_glock_put(gl);
@@ -544,11 +541,6 @@ __acquires(&gl->gl_spin)
544 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 541 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
545 542
546 gfs2_glock_hold(gl); 543 gfs2_glock_hold(gl);
547 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
548 gl->gl_state == LM_ST_DEFERRED) &&
549 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
550 lck_flags |= LM_FLAG_TRY_1CB;
551
552 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 544 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
553 /* lock_dlm */ 545 /* lock_dlm */
554 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 546 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -650,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
650 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 642 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
651 643
652 if (ip) 644 if (ip)
653 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 645 inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
654 else 646 else
655 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 647 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
656 if (inode && !IS_ERR(inode)) { 648 if (inode && !IS_ERR(inode)) {
@@ -1027,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1027 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1019 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1028 return -EIO; 1020 return -EIO;
1029 1021
1022 if (test_bit(GLF_LRU, &gl->gl_flags))
1023 gfs2_glock_remove_from_lru(gl);
1024
1030 spin_lock(&gl->gl_spin); 1025 spin_lock(&gl->gl_spin);
1031 add_to_queue(gh); 1026 add_to_queue(gh);
1032 if ((LM_FLAG_NOEXP & gh->gh_flags) && 1027 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1084,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1084 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1079 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1085 fast_path = 1; 1080 fast_path = 1;
1086 } 1081 }
1087 __gfs2_glock_schedule_for_reclaim(gl); 1082 if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
1083 __gfs2_glock_schedule_for_reclaim(gl);
1088 trace_gfs2_glock_queue(gh, 0); 1084 trace_gfs2_glock_queue(gh, 0);
1089 spin_unlock(&gl->gl_spin); 1085 spin_unlock(&gl->gl_spin);
1090 if (likely(fast_path)) 1086 if (likely(fast_path))
@@ -1123,7 +1119,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1123 * @number: the lock number 1119 * @number: the lock number
1124 * @glops: the glock operations for the type of glock 1120 * @glops: the glock operations for the type of glock
1125 * @state: the state to acquire the glock in 1121 * @state: the state to acquire the glock in
1126 * @flags: modifier flags for the aquisition 1122 * @flags: modifier flags for the acquisition
1127 * @gh: the struct gfs2_holder 1123 * @gh: the struct gfs2_holder
1128 * 1124 *
1129 * Returns: errno 1125 * Returns: errno
@@ -1367,6 +1363,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1367 while(nr && !list_empty(&lru_list)) { 1363 while(nr && !list_empty(&lru_list)) {
1368 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1364 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1369 list_del_init(&gl->gl_lru); 1365 list_del_init(&gl->gl_lru);
1366 clear_bit(GLF_LRU, &gl->gl_flags);
1370 atomic_dec(&lru_count); 1367 atomic_dec(&lru_count);
1371 1368
1372 /* Test for being demotable */ 1369 /* Test for being demotable */
@@ -1389,6 +1386,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1389 } 1386 }
1390 nr_skipped++; 1387 nr_skipped++;
1391 list_add(&gl->gl_lru, &skipped); 1388 list_add(&gl->gl_lru, &skipped);
1389 set_bit(GLF_LRU, &gl->gl_flags);
1392 } 1390 }
1393 list_splice(&skipped, &lru_list); 1391 list_splice(&skipped, &lru_list);
1394 atomic_add(nr_skipped, &lru_count); 1392 atomic_add(nr_skipped, &lru_count);
@@ -1461,12 +1459,7 @@ static void thaw_glock(struct gfs2_glock *gl)
1461 1459
1462static void clear_glock(struct gfs2_glock *gl) 1460static void clear_glock(struct gfs2_glock *gl)
1463{ 1461{
1464 spin_lock(&lru_lock); 1462 gfs2_glock_remove_from_lru(gl);
1465 if (!list_empty(&gl->gl_lru)) {
1466 list_del_init(&gl->gl_lru);
1467 atomic_dec(&lru_count);
1468 }
1469 spin_unlock(&lru_lock);
1470 1463
1471 spin_lock(&gl->gl_spin); 1464 spin_lock(&gl->gl_spin);
1472 if (gl->gl_state != LM_ST_UNLOCKED) 1465 if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1601,9 +1594,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1601 return 0; 1594 return 0;
1602} 1595}
1603 1596
1604static const char *gflags2str(char *buf, const unsigned long *gflags) 1597static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1605{ 1598{
1599 const unsigned long *gflags = &gl->gl_flags;
1606 char *p = buf; 1600 char *p = buf;
1601
1607 if (test_bit(GLF_LOCK, gflags)) 1602 if (test_bit(GLF_LOCK, gflags))
1608 *p++ = 'l'; 1603 *p++ = 'l';
1609 if (test_bit(GLF_DEMOTE, gflags)) 1604 if (test_bit(GLF_DEMOTE, gflags))
@@ -1626,6 +1621,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1626 *p++ = 'F'; 1621 *p++ = 'F';
1627 if (test_bit(GLF_QUEUED, gflags)) 1622 if (test_bit(GLF_QUEUED, gflags))
1628 *p++ = 'q'; 1623 *p++ = 'q';
1624 if (test_bit(GLF_LRU, gflags))
1625 *p++ = 'L';
1626 if (gl->gl_object)
1627 *p++ = 'o';
1629 *p = 0; 1628 *p = 0;
1630 return buf; 1629 return buf;
1631} 1630}
@@ -1660,14 +1659,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1660 dtime *= 1000000/HZ; /* demote time in uSec */ 1659 dtime *= 1000000/HZ; /* demote time in uSec */
1661 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1660 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1662 dtime = 0; 1661 dtime = 0;
1663 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", 1662 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
1664 state2str(gl->gl_state), 1663 state2str(gl->gl_state),
1665 gl->gl_name.ln_type, 1664 gl->gl_name.ln_type,
1666 (unsigned long long)gl->gl_name.ln_number, 1665 (unsigned long long)gl->gl_name.ln_number,
1667 gflags2str(gflags_buf, &gl->gl_flags), 1666 gflags2str(gflags_buf, gl),
1668 state2str(gl->gl_target), 1667 state2str(gl->gl_target),
1669 state2str(gl->gl_demote_state), dtime, 1668 state2str(gl->gl_demote_state), dtime,
1670 atomic_read(&gl->gl_ail_count), 1669 atomic_read(&gl->gl_ail_count),
1670 atomic_read(&gl->gl_revokes),
1671 atomic_read(&gl->gl_ref)); 1671 atomic_read(&gl->gl_ref));
1672 1672
1673 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1673 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
225 225
226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 228extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 229extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); 230extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 231extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
233extern void gfs2_glock_free(struct gfs2_glock *gl); 232extern void gfs2_glock_free(struct gfs2_glock *gl);
234 233
235extern int __init gfs2_glock_init(void); 234extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3754e3cbf02b..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
28#include "trans.h" 28#include "trans.h"
29 29
30/** 30/**
31 * ail_empty_gl - remove all buffers for a given lock from the AIL 31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
32 * @gl: the glock 32 * @gl: the glock
33 * 33 *
34 * None of the buffers should be dirty, locked, or pinned. 34 * None of the buffers should be dirty, locked, or pinned.
35 */ 35 */
36 36
37static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void __gfs2_ail_flush(struct gfs2_glock *gl)
38{ 38{
39 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
40 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 struct buffer_head *bh; 42 struct buffer_head *bh;
43 struct gfs2_trans tr;
44
45 memset(&tr, 0, sizeof(tr));
46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
47
48 if (!tr.tr_revokes)
49 return;
50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58 43
59 spin_lock(&sdp->sd_ail_lock); 44 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 45 while (!list_empty(head)) {
@@ -76,11 +61,51 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 } 61 }
77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 62 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 63 spin_unlock(&sdp->sd_ail_lock);
64}
65
66
67static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
68{
69 struct gfs2_sbd *sdp = gl->gl_sbd;
70 struct gfs2_trans tr;
71
72 memset(&tr, 0, sizeof(tr));
73 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
74
75 if (!tr.tr_revokes)
76 return;
77
78 /* A shortened, inline version of gfs2_trans_begin() */
79 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
80 tr.tr_ip = (unsigned long)__builtin_return_address(0);
81 INIT_LIST_HEAD(&tr.tr_list_buf);
82 gfs2_log_reserve(sdp, tr.tr_reserved);
83 BUG_ON(current->journal_info);
84 current->journal_info = &tr;
85
86 __gfs2_ail_flush(gl);
79 87
80 gfs2_trans_end(sdp); 88 gfs2_trans_end(sdp);
81 gfs2_log_flush(sdp, NULL); 89 gfs2_log_flush(sdp, NULL);
82} 90}
83 91
92void gfs2_ail_flush(struct gfs2_glock *gl)
93{
94 struct gfs2_sbd *sdp = gl->gl_sbd;
95 unsigned int revokes = atomic_read(&gl->gl_ail_count);
96 int ret;
97
98 if (!revokes)
99 return;
100
101 ret = gfs2_trans_begin(sdp, 0, revokes);
102 if (ret)
103 return;
104 __gfs2_ail_flush(gl);
105 gfs2_trans_end(sdp);
106 gfs2_log_flush(sdp, NULL);
107}
108
84/** 109/**
85 * rgrp_go_sync - sync out the metadata for this glock 110 * rgrp_go_sync - sync out the metadata for this glock
86 * @gl: the glock 111 * @gl: the glock
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
227} 252}
228 253
229/** 254/**
255 * gfs2_set_nlink - Set the inode's link count based on on-disk info
256 * @inode: The inode in question
257 * @nlink: The link count
258 *
259 * If the link count has hit zero, it must never be raised, whatever the
260 * on-disk inode might say. When new struct inodes are created the link
261 * count is set to 1, so that we can safely use this test even when reading
262 * in on disk information for the first time.
263 */
264
265static void gfs2_set_nlink(struct inode *inode, u32 nlink)
266{
267 /*
268 * We will need to review setting the nlink count here in the
269 * light of the forthcoming ro bind mount work. This is a reminder
270 * to do that.
271 */
272 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
273 if (nlink == 0)
274 clear_nlink(inode);
275 else
276 inode->i_nlink = nlink;
277 }
278}
279
280static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
281{
282 const struct gfs2_dinode *str = buf;
283 struct timespec atime;
284 u16 height, depth;
285
286 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
287 goto corrupt;
288 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
289 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
290 ip->i_inode.i_rdev = 0;
291 switch (ip->i_inode.i_mode & S_IFMT) {
292 case S_IFBLK:
293 case S_IFCHR:
294 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
295 be32_to_cpu(str->di_minor));
296 break;
297 };
298
299 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
300 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
301 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
302 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
303 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
304 atime.tv_sec = be64_to_cpu(str->di_atime);
305 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
306 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
307 ip->i_inode.i_atime = atime;
308 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
309 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
310 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
311 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
312
313 ip->i_goal = be64_to_cpu(str->di_goal_meta);
314 ip->i_generation = be64_to_cpu(str->di_generation);
315
316 ip->i_diskflags = be32_to_cpu(str->di_flags);
317 gfs2_set_inode_flags(&ip->i_inode);
318 height = be16_to_cpu(str->di_height);
319 if (unlikely(height > GFS2_MAX_META_HEIGHT))
320 goto corrupt;
321 ip->i_height = (u8)height;
322
323 depth = be16_to_cpu(str->di_depth);
324 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
325 goto corrupt;
326 ip->i_depth = (u8)depth;
327 ip->i_entries = be32_to_cpu(str->di_entries);
328
329 ip->i_eattr = be64_to_cpu(str->di_eattr);
330 if (S_ISREG(ip->i_inode.i_mode))
331 gfs2_set_aops(&ip->i_inode);
332
333 return 0;
334corrupt:
335 gfs2_consist_inode(ip);
336 return -EIO;
337}
338
339/**
340 * gfs2_inode_refresh - Refresh the incore copy of the dinode
341 * @ip: The GFS2 inode
342 *
343 * Returns: errno
344 */
345
346int gfs2_inode_refresh(struct gfs2_inode *ip)
347{
348 struct buffer_head *dibh;
349 int error;
350
351 error = gfs2_meta_inode_buffer(ip, &dibh);
352 if (error)
353 return error;
354
355 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
356 brelse(dibh);
357 return -EIO;
358 }
359
360 error = gfs2_dinode_in(ip, dibh->b_data);
361 brelse(dibh);
362 clear_bit(GIF_INVALID, &ip->i_flags);
363
364 return error;
365}
366
367/**
230 * inode_go_lock - operation done after an inode lock is locked by a process 368 * inode_go_lock - operation done after an inode lock is locked by a process
231 * @gl: the glock 369 * @gl: the glock
232 * @flags: 370 * @flags:
@@ -385,6 +523,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
385static void iopen_go_callback(struct gfs2_glock *gl) 523static void iopen_go_callback(struct gfs2_glock *gl)
386{ 524{
387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 525 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
526 struct gfs2_sbd *sdp = gl->gl_sbd;
527
528 if (sdp->sd_vfs->s_flags & MS_RDONLY)
529 return;
388 530
389 if (gl->gl_demote_state == LM_ST_UNLOCKED && 531 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
390 gl->gl_state == LM_ST_SHARED && ip) { 532 gl->gl_state == LM_ST_SHARED && ip) {
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[]; 24extern const struct gfs2_glock_operations *gfs2_glops_list[];
25 25
26extern void gfs2_ail_flush(struct gfs2_glock *gl);
27
26#endif /* __GLOPS_DOT_H__ */ 28#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
20 20
21#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
22#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
23#define DIO_ALL 0x00000100
24 23
25struct gfs2_log_operations; 24struct gfs2_log_operations;
26struct gfs2_log_element; 25struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
200 GLF_INITIAL = 10, 199 GLF_INITIAL = 10,
201 GLF_FROZEN = 11, 200 GLF_FROZEN = 11,
202 GLF_QUEUED = 12, 201 GLF_QUEUED = 12,
202 GLF_LRU = 13,
203 GLF_OBJECT = 14, /* Used only for tracing */
203}; 204};
204 205
205struct gfs2_glock { 206struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
234 235
235 struct list_head gl_ail_list; 236 struct list_head gl_ail_list;
236 atomic_t gl_ail_count; 237 atomic_t gl_ail_count;
238 atomic_t gl_revokes;
237 struct delayed_work gl_work; 239 struct delayed_work gl_work;
238 struct work_struct gl_delete; 240 struct work_struct gl_delete;
239 struct rcu_head gl_rcu; 241 struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
374 unsigned int ai_first; 376 unsigned int ai_first;
375 struct list_head ai_ail1_list; 377 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list; 378 struct list_head ai_ail2_list;
377
378 u64 ai_sync_gen;
379}; 379};
380 380
381struct gfs2_journal_extent { 381struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
488 488
489 char sb_lockproto[GFS2_LOCKNAME_LEN]; 489 char sb_lockproto[GFS2_LOCKNAME_LEN];
490 char sb_locktable[GFS2_LOCKNAME_LEN]; 490 char sb_locktable[GFS2_LOCKNAME_LEN];
491 u8 sb_uuid[16];
492}; 491};
493 492
494/* 493/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
654 spinlock_t sd_ail_lock; 653 spinlock_t sd_ail_lock;
655 struct list_head sd_ail1_list; 654 struct list_head sd_ail1_list;
656 struct list_head sd_ail2_list; 655 struct list_head sd_ail2_list;
657 u64 sd_ail_sync_gen;
658 656
659 /* Replay stuff */ 657 /* Replay stuff */
660 658
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d54a28776a..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/sched.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/spinlock.h> 11#include <linux/spinlock.h>
13#include <linux/completion.h> 12#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
15#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h>
19#include <linux/security.h> 21#include <linux/security.h>
20#include <linux/time.h> 22#include <asm/uaccess.h>
21 23
22#include "gfs2.h" 24#include "gfs2.h"
23#include "incore.h" 25#include "incore.h"
@@ -26,51 +28,70 @@
26#include "dir.h" 28#include "dir.h"
27#include "xattr.h" 29#include "xattr.h"
28#include "glock.h" 30#include "glock.h"
29#include "glops.h"
30#include "inode.h" 31#include "inode.h"
31#include "log.h"
32#include "meta_io.h" 32#include "meta_io.h"
33#include "quota.h" 33#include "quota.h"
34#include "rgrp.h" 34#include "rgrp.h"
35#include "trans.h" 35#include "trans.h"
36#include "util.h" 36#include "util.h"
37#include "super.h"
38#include "glops.h"
37 39
38struct gfs2_inum_range_host { 40struct gfs2_skip_data {
39 u64 ir_start; 41 u64 no_addr;
40 u64 ir_length; 42 int skipped;
43 int non_block;
41}; 44};
42 45
43static int iget_test(struct inode *inode, void *opaque) 46static int iget_test(struct inode *inode, void *opaque)
44{ 47{
45 struct gfs2_inode *ip = GFS2_I(inode); 48 struct gfs2_inode *ip = GFS2_I(inode);
46 u64 *no_addr = opaque; 49 struct gfs2_skip_data *data = opaque;
47 50
48 if (ip->i_no_addr == *no_addr) 51 if (ip->i_no_addr == data->no_addr) {
52 if (data->non_block &&
53 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
54 data->skipped = 1;
55 return 0;
56 }
49 return 1; 57 return 1;
50 58 }
51 return 0; 59 return 0;
52} 60}
53 61
54static int iget_set(struct inode *inode, void *opaque) 62static int iget_set(struct inode *inode, void *opaque)
55{ 63{
56 struct gfs2_inode *ip = GFS2_I(inode); 64 struct gfs2_inode *ip = GFS2_I(inode);
57 u64 *no_addr = opaque; 65 struct gfs2_skip_data *data = opaque;
58 66
59 inode->i_ino = (unsigned long)*no_addr; 67 if (data->skipped)
60 ip->i_no_addr = *no_addr; 68 return -ENOENT;
69 inode->i_ino = (unsigned long)(data->no_addr);
70 ip->i_no_addr = data->no_addr;
61 return 0; 71 return 0;
62} 72}
63 73
64struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 74struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
65{ 75{
66 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
67 return ilookup5(sb, hash, iget_test, &no_addr); 77 struct gfs2_skip_data data;
78
79 data.no_addr = no_addr;
80 data.skipped = 0;
81 data.non_block = non_block;
82 return ilookup5(sb, hash, iget_test, &data);
68} 83}
69 84
70static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 85static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
86 int non_block)
71{ 87{
88 struct gfs2_skip_data data;
72 unsigned long hash = (unsigned long)no_addr; 89 unsigned long hash = (unsigned long)no_addr;
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 90
91 data.no_addr = no_addr;
92 data.skipped = 0;
93 data.non_block = non_block;
94 return iget5_locked(sb, hash, iget_test, iget_set, &data);
74} 95}
75 96
76/** 97/**
@@ -111,19 +132,20 @@ static void gfs2_set_iop(struct inode *inode)
111 * @sb: The super block 132 * @sb: The super block
112 * @no_addr: The inode number 133 * @no_addr: The inode number
113 * @type: The type of the inode 134 * @type: The type of the inode
135 * non_block: Can we block on inodes that are being freed?
114 * 136 *
115 * Returns: A VFS inode, or an error 137 * Returns: A VFS inode, or an error
116 */ 138 */
117 139
118struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 140struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
119 u64 no_addr, u64 no_formal_ino) 141 u64 no_addr, u64 no_formal_ino, int non_block)
120{ 142{
121 struct inode *inode; 143 struct inode *inode;
122 struct gfs2_inode *ip; 144 struct gfs2_inode *ip;
123 struct gfs2_glock *io_gl = NULL; 145 struct gfs2_glock *io_gl = NULL;
124 int error; 146 int error;
125 147
126 inode = gfs2_iget(sb, no_addr); 148 inode = gfs2_iget(sb, no_addr, non_block);
127 ip = GFS2_I(inode); 149 ip = GFS2_I(inode);
128 150
129 if (!inode) 151 if (!inode)
@@ -185,11 +207,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
185{ 207{
186 struct super_block *sb = sdp->sd_vfs; 208 struct super_block *sb = sdp->sd_vfs;
187 struct gfs2_holder i_gh; 209 struct gfs2_holder i_gh;
188 struct inode *inode; 210 struct inode *inode = NULL;
189 int error; 211 int error;
190 212
213 /* Must not read in block until block type is verified */
191 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 214 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
192 LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 215 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
193 if (error) 216 if (error)
194 return ERR_PTR(error); 217 return ERR_PTR(error);
195 218
@@ -197,7 +220,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
197 if (error) 220 if (error)
198 goto fail; 221 goto fail;
199 222
200 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); 223 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
201 if (IS_ERR(inode)) 224 if (IS_ERR(inode))
202 goto fail; 225 goto fail;
203 226
@@ -222,203 +245,6 @@ fail_iput:
222 goto fail; 245 goto fail;
223} 246}
224 247
225static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
226{
227 const struct gfs2_dinode *str = buf;
228 struct timespec atime;
229 u16 height, depth;
230
231 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
232 goto corrupt;
233 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
234 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
235 ip->i_inode.i_rdev = 0;
236 switch (ip->i_inode.i_mode & S_IFMT) {
237 case S_IFBLK:
238 case S_IFCHR:
239 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
240 be32_to_cpu(str->di_minor));
241 break;
242 };
243
244 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
245 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
246 /*
247 * We will need to review setting the nlink count here in the
248 * light of the forthcoming ro bind mount work. This is a reminder
249 * to do that.
250 */
251 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
252 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
253 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
254 atime.tv_sec = be64_to_cpu(str->di_atime);
255 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
256 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
257 ip->i_inode.i_atime = atime;
258 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
259 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
260 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
261 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
262
263 ip->i_goal = be64_to_cpu(str->di_goal_meta);
264 ip->i_generation = be64_to_cpu(str->di_generation);
265
266 ip->i_diskflags = be32_to_cpu(str->di_flags);
267 gfs2_set_inode_flags(&ip->i_inode);
268 height = be16_to_cpu(str->di_height);
269 if (unlikely(height > GFS2_MAX_META_HEIGHT))
270 goto corrupt;
271 ip->i_height = (u8)height;
272
273 depth = be16_to_cpu(str->di_depth);
274 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
275 goto corrupt;
276 ip->i_depth = (u8)depth;
277 ip->i_entries = be32_to_cpu(str->di_entries);
278
279 ip->i_eattr = be64_to_cpu(str->di_eattr);
280 if (S_ISREG(ip->i_inode.i_mode))
281 gfs2_set_aops(&ip->i_inode);
282
283 return 0;
284corrupt:
285 if (gfs2_consist_inode(ip))
286 gfs2_dinode_print(ip);
287 return -EIO;
288}
289
290/**
291 * gfs2_inode_refresh - Refresh the incore copy of the dinode
292 * @ip: The GFS2 inode
293 *
294 * Returns: errno
295 */
296
297int gfs2_inode_refresh(struct gfs2_inode *ip)
298{
299 struct buffer_head *dibh;
300 int error;
301
302 error = gfs2_meta_inode_buffer(ip, &dibh);
303 if (error)
304 return error;
305
306 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
307 brelse(dibh);
308 return -EIO;
309 }
310
311 error = gfs2_dinode_in(ip, dibh->b_data);
312 brelse(dibh);
313 clear_bit(GIF_INVALID, &ip->i_flags);
314
315 return error;
316}
317
318int gfs2_dinode_dealloc(struct gfs2_inode *ip)
319{
320 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
321 struct gfs2_alloc *al;
322 struct gfs2_rgrpd *rgd;
323 int error;
324
325 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
326 if (gfs2_consist_inode(ip))
327 gfs2_dinode_print(ip);
328 return -EIO;
329 }
330
331 al = gfs2_alloc_get(ip);
332 if (!al)
333 return -ENOMEM;
334
335 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
336 if (error)
337 goto out;
338
339 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
340 if (error)
341 goto out_qs;
342
343 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
344 if (!rgd) {
345 gfs2_consist_inode(ip);
346 error = -EIO;
347 goto out_rindex_relse;
348 }
349
350 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
351 &al->al_rgd_gh);
352 if (error)
353 goto out_rindex_relse;
354
355 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
356 if (error)
357 goto out_rg_gunlock;
358
359 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
360 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
361
362 gfs2_free_di(rgd, ip);
363
364 gfs2_trans_end(sdp);
365
366out_rg_gunlock:
367 gfs2_glock_dq_uninit(&al->al_rgd_gh);
368out_rindex_relse:
369 gfs2_glock_dq_uninit(&al->al_ri_gh);
370out_qs:
371 gfs2_quota_unhold(ip);
372out:
373 gfs2_alloc_put(ip);
374 return error;
375}
376
377/**
378 * gfs2_change_nlink - Change nlink count on inode
379 * @ip: The GFS2 inode
380 * @diff: The change in the nlink count required
381 *
382 * Returns: errno
383 */
384int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
385{
386 struct buffer_head *dibh;
387 u32 nlink;
388 int error;
389
390 BUG_ON(diff != 1 && diff != -1);
391 nlink = ip->i_inode.i_nlink + diff;
392
393 /* If we are reducing the nlink count, but the new value ends up being
394 bigger than the old one, we must have underflowed. */
395 if (diff < 0 && nlink > ip->i_inode.i_nlink) {
396 if (gfs2_consist_inode(ip))
397 gfs2_dinode_print(ip);
398 return -EIO;
399 }
400
401 error = gfs2_meta_inode_buffer(ip, &dibh);
402 if (error)
403 return error;
404
405 if (diff > 0)
406 inc_nlink(&ip->i_inode);
407 else
408 drop_nlink(&ip->i_inode);
409
410 ip->i_inode.i_ctime = CURRENT_TIME;
411
412 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
413 gfs2_dinode_out(ip, dibh->b_data);
414 brelse(dibh);
415 mark_inode_dirty(&ip->i_inode);
416
417 if (ip->i_inode.i_nlink == 0)
418 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
419
420 return error;
421}
422 248
423struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
424{ 250{
@@ -517,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
517 343
518 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
519 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
520 return -EPERM; 346 return -ENOENT;
521 347
522 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
523 switch (error) { 349 switch (error) {
@@ -587,21 +413,44 @@ out:
587 return error; 413 return error;
588} 414}
589 415
416static void gfs2_init_dir(struct buffer_head *dibh,
417 const struct gfs2_inode *parent)
418{
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421
422 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
423 dent->de_inum = di->di_num; /* already GFS2 endian */
424 dent->de_type = cpu_to_be16(DT_DIR);
425
426 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
427 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
428 gfs2_inum_out(parent, dent);
429 dent->de_type = cpu_to_be16(DT_DIR);
430
431}
432
590/** 433/**
591 * init_dinode - Fill in a new dinode structure 434 * init_dinode - Fill in a new dinode structure
592 * @dip: the directory this inode is being created in 435 * @dip: The directory this inode is being created in
593 * @gl: The glock covering the new inode 436 * @gl: The glock covering the new inode
594 * @inum: the inode number 437 * @inum: The inode number
595 * @mode: the file permissions 438 * @mode: The file permissions
596 * @uid: 439 * @uid: The uid of the new inode
597 * @gid: 440 * @gid: The gid of the new inode
441 * @generation: The generation number of the new inode
442 * @dev: The device number (if a device node)
443 * @symname: The symlink destination (if a symlink)
444 * @size: The inode size (ignored for directories)
445 * @bhp: The buffer head (returned to caller)
598 * 446 *
599 */ 447 */
600 448
601static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
602 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, unsigned int mode,
603 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
604 const u64 *generation, dev_t dev, struct buffer_head **bhp) 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp)
605{ 454{
606 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
607 struct gfs2_dinode *di; 456 struct gfs2_dinode *di;
@@ -620,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
620 di->di_uid = cpu_to_be32(uid); 469 di->di_uid = cpu_to_be32(uid);
621 di->di_gid = cpu_to_be32(gid); 470 di->di_gid = cpu_to_be32(gid);
622 di->di_nlink = 0; 471 di->di_nlink = 0;
623 di->di_size = 0; 472 di->di_size = cpu_to_be64(size);
624 di->di_blocks = cpu_to_be64(1); 473 di->di_blocks = cpu_to_be64(1);
625 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
626 di->di_major = cpu_to_be32(MAJOR(dev)); 475 di->di_major = cpu_to_be32(MAJOR(dev));
@@ -628,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
628 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
629 di->di_generation = cpu_to_be64(*generation); 478 di->di_generation = cpu_to_be64(*generation);
630 di->di_flags = 0; 479 di->di_flags = 0;
631
632 if (S_ISREG(mode)) {
633 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
634 gfs2_tune_get(sdp, gt_new_files_jdata))
635 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
636 } else if (S_ISDIR(mode)) {
637 di->di_flags |= cpu_to_be32(dip->i_diskflags &
638 GFS2_DIF_INHERIT_JDATA);
639 }
640
641 di->__pad1 = 0; 480 di->__pad1 = 0;
642 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 481 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
643 di->di_height = 0; 482 di->di_height = 0;
@@ -651,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
651 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 490 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
652 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 491 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
653 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 492 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
654 493
494 switch(mode & S_IFMT) {
495 case S_IFREG:
496 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
497 gfs2_tune_get(sdp, gt_new_files_jdata))
498 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
499 break;
500 case S_IFDIR:
501 di->di_flags |= cpu_to_be32(dip->i_diskflags &
502 GFS2_DIF_INHERIT_JDATA);
503 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
504 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
505 di->di_entries = cpu_to_be32(2);
506 gfs2_init_dir(dibh, dip);
507 break;
508 case S_IFLNK:
509 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
510 break;
511 }
512
655 set_buffer_uptodate(dibh); 513 set_buffer_uptodate(dibh);
656 514
657 *bhp = dibh; 515 *bhp = dibh;
@@ -659,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
659 517
660static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
661 unsigned int mode, const struct gfs2_inum_host *inum, 519 unsigned int mode, const struct gfs2_inum_host *inum,
662 const u64 *generation, dev_t dev, struct buffer_head **bhp) 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp)
663{ 522{
664 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 523 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
665 unsigned int uid, gid; 524 unsigned int uid, gid;
@@ -681,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
681 if (error) 540 if (error)
682 goto out_quota; 541 goto out_quota;
683 542
684 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 543 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
685 gfs2_quota_change(dip, +1, uid, gid); 544 gfs2_quota_change(dip, +1, uid, gid);
686 gfs2_trans_end(sdp); 545 gfs2_trans_end(sdp);
687 546
@@ -735,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
735 goto fail_quota_locks; 594 goto fail_quota_locks;
736 } 595 }
737 596
738 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 597 error = gfs2_dir_add(&dip->i_inode, name, ip);
739 if (error) 598 if (error)
740 goto fail_end_trans; 599 goto fail_end_trans;
741 600
742 error = gfs2_meta_inode_buffer(ip, &dibh); 601 error = gfs2_meta_inode_buffer(ip, &dibh);
743 if (error) 602 if (error)
744 goto fail_end_trans; 603 goto fail_end_trans;
745 ip->i_inode.i_nlink = 1; 604 inc_nlink(&ip->i_inode);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
746 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 607 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
747 gfs2_dinode_out(ip, dibh->b_data); 608 gfs2_dinode_out(ip, dibh->b_data);
748 brelse(dibh); 609 brelse(dibh);
@@ -789,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
789} 650}
790 651
791/** 652/**
792 * gfs2_createi - Create a new inode 653 * gfs2_create_inode - Create a new inode
793 * @ghs: An array of two holders 654 * @dir: The parent directory
794 * @name: The name of the new file 655 * @dentry: The new dentry
795 * @mode: the permissions on the new inode 656 * @mode: The permissions on the new inode
657 * @dev: For device nodes, this is the device number
658 * @symname: For symlinks, this is the link destination
659 * @size: The initial size of the inode (ignored for directories)
796 * 660 *
797 * @ghs[0] is an initialized holder for the directory 661 * Returns: 0 on success, or error code
798 * @ghs[1] is the holder for the inode lock
799 *
800 * If the return value is not NULL, the glocks on both the directory and the new
801 * file are held. A transaction has been started and an inplace reservation
802 * is held, as well.
803 *
804 * Returns: An inode
805 */ 662 */
806 663
807struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 664static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
808 unsigned int mode, dev_t dev) 665 unsigned int mode, dev_t dev, const char *symname,
666 unsigned int size)
809{ 667{
668 const struct qstr *name = &dentry->d_name;
669 struct gfs2_holder ghs[2];
810 struct inode *inode = NULL; 670 struct inode *inode = NULL;
811 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 671 struct gfs2_inode *dip = GFS2_I(dir);
812 struct inode *dir = &dip->i_inode;
813 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 672 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
814 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 673 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
815 int error; 674 int error;
@@ -817,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
817 struct buffer_head *bh = NULL; 676 struct buffer_head *bh = NULL;
818 677
819 if (!name->len || name->len > GFS2_FNAMESIZE) 678 if (!name->len || name->len > GFS2_FNAMESIZE)
820 return ERR_PTR(-ENAMETOOLONG); 679 return -ENAMETOOLONG;
821 680
822 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 681 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
823 error = gfs2_glock_nq(ghs);
824 if (error) 682 if (error)
825 goto fail; 683 goto fail;
826 684
@@ -838,12 +696,12 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
838 if (error) 696 if (error)
839 goto fail_gunlock; 697 goto fail_gunlock;
840 698
841 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 699 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
842 if (error) 700 if (error)
843 goto fail_gunlock2; 701 goto fail_gunlock2;
844 702
845 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 703 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
846 inum.no_formal_ino); 704 inum.no_formal_ino, 0);
847 if (IS_ERR(inode)) 705 if (IS_ERR(inode))
848 goto fail_gunlock2; 706 goto fail_gunlock2;
849 707
@@ -865,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
865 723
866 if (bh) 724 if (bh)
867 brelse(bh); 725 brelse(bh);
868 return inode; 726
727 gfs2_trans_end(sdp);
728 if (dip->i_alloc->al_rgd)
729 gfs2_inplace_release(dip);
730 gfs2_quota_unlock(dip);
731 gfs2_alloc_put(dip);
732 gfs2_glock_dq_uninit_m(2, ghs);
733 mark_inode_dirty(inode);
734 d_instantiate(dentry, inode);
735 return 0;
869 736
870fail_gunlock2: 737fail_gunlock2:
871 gfs2_glock_dq_uninit(ghs + 1); 738 gfs2_glock_dq_uninit(ghs + 1);
872 if (inode && !IS_ERR(inode)) 739 if (inode && !IS_ERR(inode))
873 iput(inode); 740 iput(inode);
874fail_gunlock: 741fail_gunlock:
875 gfs2_glock_dq(ghs); 742 gfs2_glock_dq_uninit(ghs);
876fail: 743fail:
877 if (bh) 744 if (bh)
878 brelse(bh); 745 brelse(bh);
879 return ERR_PTR(error); 746 return error;
747}
748
749/**
750 * gfs2_create - Create a file
751 * @dir: The directory in which to create the file
752 * @dentry: The dentry of the new file
753 * @mode: The mode of the new file
754 *
755 * Returns: errno
756 */
757
758static int gfs2_create(struct inode *dir, struct dentry *dentry,
759 int mode, struct nameidata *nd)
760{
761 struct inode *inode;
762 int ret;
763
764 for (;;) {
765 ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
766 if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
767 return ret;
768
769 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
770 if (inode) {
771 if (!IS_ERR(inode))
772 break;
773 return PTR_ERR(inode);
774 }
775 }
776
777 d_instantiate(dentry, inode);
778 return 0;
779}
780
781/**
782 * gfs2_lookup - Look up a filename in a directory and return its inode
783 * @dir: The directory inode
784 * @dentry: The dentry of the new inode
785 * @nd: passed from Linux VFS, ignored by us
786 *
787 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
788 *
789 * Returns: errno
790 */
791
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd)
794{
795 struct inode *inode = NULL;
796
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh;
804 int error;
805 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
806 if (error) {
807 iput(inode);
808 return ERR_PTR(error);
809 }
810 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 }
813 d_add(dentry, inode);
814
815 return NULL;
816}
817
818/**
819 * gfs2_link - Link to a file
820 * @old_dentry: The inode to link
821 * @dir: Add link to this directory
822 * @dentry: The name of the link
823 *
824 * Link the inode in "old_dentry" into the directory "dir" with the
825 * name in "dentry".
826 *
827 * Returns: errno
828 */
829
830static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
831 struct dentry *dentry)
832{
833 struct gfs2_inode *dip = GFS2_I(dir);
834 struct gfs2_sbd *sdp = GFS2_SB(dir);
835 struct inode *inode = old_dentry->d_inode;
836 struct gfs2_inode *ip = GFS2_I(inode);
837 struct gfs2_holder ghs[2];
838 struct buffer_head *dibh;
839 int alloc_required;
840 int error;
841
842 if (S_ISDIR(inode->i_mode))
843 return -EPERM;
844
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847
848 error = gfs2_glock_nq(ghs); /* parent */
849 if (error)
850 goto out_parent;
851
852 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error)
854 goto out_child;
855
856 error = -ENOENT;
857 if (inode->i_nlink == 0)
858 goto out_gunlock;
859
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
861 if (error)
862 goto out_gunlock;
863
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) {
866 case -ENOENT:
867 break;
868 case 0:
869 error = -EEXIST;
870 default:
871 goto out_gunlock;
872 }
873
874 error = -EINVAL;
875 if (!dip->i_inode.i_nlink)
876 goto out_gunlock;
877 error = -EFBIG;
878 if (dip->i_entries == (u32)-1)
879 goto out_gunlock;
880 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock;
883 error = -EINVAL;
884 if (!ip->i_inode.i_nlink)
885 goto out_gunlock;
886 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock;
889
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0)
892 goto out_gunlock;
893 error = 0;
894
895 if (alloc_required) {
896 struct gfs2_alloc *al = gfs2_alloc_get(dip);
897 if (!al) {
898 error = -ENOMEM;
899 goto out_gunlock;
900 }
901
902 error = gfs2_quota_lock_check(dip);
903 if (error)
904 goto out_alloc;
905
906 al->al_requested = sdp->sd_max_dirres;
907
908 error = gfs2_inplace_reserve(dip);
909 if (error)
910 goto out_gunlock_q;
911
912 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
913 gfs2_rg_blocks(al) +
914 2 * RES_DINODE + RES_STATFS +
915 RES_QUOTA, 0);
916 if (error)
917 goto out_ipres;
918 } else {
919 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
920 if (error)
921 goto out_ipres;
922 }
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out_end_trans;
927
928 error = gfs2_dir_add(dir, &dentry->d_name, ip);
929 if (error)
930 goto out_brelse;
931
932 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
933 inc_nlink(&ip->i_inode);
934 ip->i_inode.i_ctime = CURRENT_TIME;
935 gfs2_dinode_out(ip, dibh->b_data);
936 mark_inode_dirty(&ip->i_inode);
937
938out_brelse:
939 brelse(dibh);
940out_end_trans:
941 gfs2_trans_end(sdp);
942out_ipres:
943 if (alloc_required)
944 gfs2_inplace_release(dip);
945out_gunlock_q:
946 if (alloc_required)
947 gfs2_quota_unlock(dip);
948out_alloc:
949 if (alloc_required)
950 gfs2_alloc_put(dip);
951out_gunlock:
952 gfs2_glock_dq(ghs + 1);
953out_child:
954 gfs2_glock_dq(ghs);
955out_parent:
956 gfs2_holder_uninit(ghs);
957 gfs2_holder_uninit(ghs + 1);
958 if (!error) {
959 ihold(inode);
960 d_instantiate(dentry, inode);
961 mark_inode_dirty(inode);
962 }
963 return error;
964}
965
966/*
967 * gfs2_unlink_ok - check to see that a inode is still in a directory
968 * @dip: the directory
969 * @name: the name of the file
970 * @ip: the inode
971 *
972 * Assumes that the lock on (at least) @dip is held.
973 *
974 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
975 */
976
977static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 const struct gfs2_inode *ip)
979{
980 int error;
981
982 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
983 return -EPERM;
984
985 if ((dip->i_inode.i_mode & S_ISVTX) &&
986 dip->i_inode.i_uid != current_fsuid() &&
987 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
988 return -EPERM;
989
990 if (IS_APPEND(&dip->i_inode))
991 return -EPERM;
992
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
994 if (error)
995 return error;
996
997 error = gfs2_dir_check(&dip->i_inode, name, ip);
998 if (error)
999 return error;
1000
1001 return 0;
1002}
1003
1004/**
1005 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
1006 * @dip: The parent directory
1007 * @name: The name of the entry in the parent directory
1008 * @bh: The inode buffer for the inode to be removed
1009 * @inode: The inode to be removed
1010 *
1011 * Called with all the locks and in a transaction. This will only be
1012 * called for a directory after it has been checked to ensure it is empty.
1013 *
1014 * Returns: 0 on success, or an error
1015 */
1016
1017static int gfs2_unlink_inode(struct gfs2_inode *dip,
1018 const struct dentry *dentry,
1019 struct buffer_head *bh)
1020{
1021 struct inode *inode = dentry->d_inode;
1022 struct gfs2_inode *ip = GFS2_I(inode);
1023 int error;
1024
1025 error = gfs2_dir_del(dip, dentry);
1026 if (error)
1027 return error;
1028
1029 ip->i_entries = 0;
1030 inode->i_ctime = CURRENT_TIME;
1031 if (S_ISDIR(inode->i_mode))
1032 clear_nlink(inode);
1033 else
1034 drop_nlink(inode);
1035 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1036 gfs2_dinode_out(ip, bh->b_data);
1037 mark_inode_dirty(inode);
1038 if (inode->i_nlink == 0)
1039 gfs2_unlink_di(inode);
1040 return 0;
1041}
1042
1043
1044/**
1045 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1046 * @dir: The inode of the directory containing the inode to unlink
1047 * @dentry: The file itself
1048 *
1049 * This routine uses the type of the inode as a flag to figure out
1050 * whether this is an unlink or an rmdir.
1051 *
1052 * Returns: errno
1053 */
1054
1055static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1056{
1057 struct gfs2_inode *dip = GFS2_I(dir);
1058 struct gfs2_sbd *sdp = GFS2_SB(dir);
1059 struct inode *inode = dentry->d_inode;
1060 struct gfs2_inode *ip = GFS2_I(inode);
1061 struct buffer_head *bh;
1062 struct gfs2_holder ghs[3];
1063 struct gfs2_rgrpd *rgd;
1064 struct gfs2_holder ri_gh;
1065 int error;
1066
1067 error = gfs2_rindex_hold(sdp, &ri_gh);
1068 if (error)
1069 return error;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1072 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1073
1074 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1075 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1076
1077
1078 error = gfs2_glock_nq(ghs); /* parent */
1079 if (error)
1080 goto out_parent;
1081
1082 error = gfs2_glock_nq(ghs + 1); /* child */
1083 if (error)
1084 goto out_child;
1085
1086 error = -ENOENT;
1087 if (inode->i_nlink == 0)
1088 goto out_rgrp;
1089
1090 if (S_ISDIR(inode->i_mode)) {
1091 error = -ENOTEMPTY;
1092 if (ip->i_entries > 2 || inode->i_nlink > 2)
1093 goto out_rgrp;
1094 }
1095
1096 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1097 if (error)
1098 goto out_rgrp;
1099
1100 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1101 if (error)
1102 goto out_gunlock;
1103
1104 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1105 if (error)
1106 goto out_gunlock;
1107
1108 error = gfs2_meta_inode_buffer(ip, &bh);
1109 if (error)
1110 goto out_end_trans;
1111
1112 error = gfs2_unlink_inode(dip, dentry, bh);
1113 brelse(bh);
1114
1115out_end_trans:
1116 gfs2_trans_end(sdp);
1117out_gunlock:
1118 gfs2_glock_dq(ghs + 2);
1119out_rgrp:
1120 gfs2_holder_uninit(ghs + 2);
1121 gfs2_glock_dq(ghs + 1);
1122out_child:
1123 gfs2_holder_uninit(ghs + 1);
1124 gfs2_glock_dq(ghs);
1125out_parent:
1126 gfs2_holder_uninit(ghs);
1127 gfs2_glock_dq_uninit(&ri_gh);
1128 return error;
1129}
1130
1131/**
1132 * gfs2_symlink - Create a symlink
1133 * @dir: The directory to create the symlink in
1134 * @dentry: The dentry to put the symlink in
1135 * @symname: The thing which the link points to
1136 *
1137 * Returns: errno
1138 */
1139
1140static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1141 const char *symname)
1142{
1143 struct gfs2_sbd *sdp = GFS2_SB(dir);
1144 unsigned int size;
1145
1146 size = strlen(symname);
1147 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1148 return -ENAMETOOLONG;
1149
1150 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
1151}
1152
1153/**
1154 * gfs2_mkdir - Make a directory
1155 * @dir: The parent directory of the new one
1156 * @dentry: The dentry of the new directory
1157 * @mode: The mode of the new directory
1158 *
1159 * Returns: errno
1160 */
1161
1162static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1163{
1164 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
1165}
1166
1167/**
1168 * gfs2_mknod - Make a special file
1169 * @dir: The directory in which the special file will reside
1170 * @dentry: The dentry of the special file
1171 * @mode: The mode of the special file
1172 * @dev: The device specification of the special file
1173 *
1174 */
1175
1176static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
1177 dev_t dev)
1178{
1179 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
1180}
1181
1182/*
1183 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1184 * @this: move this
1185 * @to: to here
1186 *
1187 * Follow @to back to the root and make sure we don't encounter @this
1188 * Assumes we already hold the rename lock.
1189 *
1190 * Returns: errno
1191 */
1192
1193static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1194{
1195 struct inode *dir = &to->i_inode;
1196 struct super_block *sb = dir->i_sb;
1197 struct inode *tmp;
1198 int error = 0;
1199
1200 igrab(dir);
1201
1202 for (;;) {
1203 if (dir == &this->i_inode) {
1204 error = -EINVAL;
1205 break;
1206 }
1207 if (dir == sb->s_root->d_inode) {
1208 error = 0;
1209 break;
1210 }
1211
1212 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1213 if (IS_ERR(tmp)) {
1214 error = PTR_ERR(tmp);
1215 break;
1216 }
1217
1218 iput(dir);
1219 dir = tmp;
1220 }
1221
1222 iput(dir);
1223
1224 return error;
1225}
1226
1227/**
1228 * gfs2_rename - Rename a file
1229 * @odir: Parent directory of old file name
1230 * @odentry: The old dentry of the file
1231 * @ndir: Parent directory of new file name
1232 * @ndentry: The new dentry of the file
1233 *
1234 * Returns: errno
1235 */
1236
1237static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1238 struct inode *ndir, struct dentry *ndentry)
1239{
1240 struct gfs2_inode *odip = GFS2_I(odir);
1241 struct gfs2_inode *ndip = GFS2_I(ndir);
1242 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1243 struct gfs2_inode *nip = NULL;
1244 struct gfs2_sbd *sdp = GFS2_SB(odir);
1245 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
1246 struct gfs2_rgrpd *nrgd;
1247 unsigned int num_gh;
1248 int dir_rename = 0;
1249 int alloc_required = 0;
1250 unsigned int x;
1251 int error;
1252
1253 if (ndentry->d_inode) {
1254 nip = GFS2_I(ndentry->d_inode);
1255 if (ip == nip)
1256 return 0;
1257 }
1258
1259 error = gfs2_rindex_hold(sdp, &ri_gh);
1260 if (error)
1261 return error;
1262
1263 if (odip != ndip) {
1264 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1265 0, &r_gh);
1266 if (error)
1267 goto out;
1268
1269 if (S_ISDIR(ip->i_inode.i_mode)) {
1270 dir_rename = 1;
1271 /* don't move a dirctory into it's subdir */
1272 error = gfs2_ok_to_move(ip, ndip);
1273 if (error)
1274 goto out_gunlock_r;
1275 }
1276 }
1277
1278 num_gh = 1;
1279 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1280 if (odip != ndip) {
1281 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1282 num_gh++;
1283 }
1284 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1285 num_gh++;
1286
1287 if (nip) {
1288 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1289 num_gh++;
1290 /* grab the resource lock for unlink flag twiddling
1291 * this is the case of the target file already existing
1292 * so we unlink before doing the rename
1293 */
1294 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
1295 if (nrgd)
1296 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1297 }
1298
1299 for (x = 0; x < num_gh; x++) {
1300 error = gfs2_glock_nq(ghs + x);
1301 if (error)
1302 goto out_gunlock;
1303 }
1304
1305 error = -ENOENT;
1306 if (ip->i_inode.i_nlink == 0)
1307 goto out_gunlock;
1308
1309 /* Check out the old directory */
1310
1311 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1312 if (error)
1313 goto out_gunlock;
1314
1315 /* Check out the new directory */
1316
1317 if (nip) {
1318 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1319 if (error)
1320 goto out_gunlock;
1321
1322 if (nip->i_inode.i_nlink == 0) {
1323 error = -EAGAIN;
1324 goto out_gunlock;
1325 }
1326
1327 if (S_ISDIR(nip->i_inode.i_mode)) {
1328 if (nip->i_entries < 2) {
1329 gfs2_consist_inode(nip);
1330 error = -EIO;
1331 goto out_gunlock;
1332 }
1333 if (nip->i_entries > 2) {
1334 error = -ENOTEMPTY;
1335 goto out_gunlock;
1336 }
1337 }
1338 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
1340 if (error)
1341 goto out_gunlock;
1342
1343 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1344 switch (error) {
1345 case -ENOENT:
1346 error = 0;
1347 break;
1348 case 0:
1349 error = -EEXIST;
1350 default:
1351 goto out_gunlock;
1352 };
1353
1354 if (odip != ndip) {
1355 if (!ndip->i_inode.i_nlink) {
1356 error = -ENOENT;
1357 goto out_gunlock;
1358 }
1359 if (ndip->i_entries == (u32)-1) {
1360 error = -EFBIG;
1361 goto out_gunlock;
1362 }
1363 if (S_ISDIR(ip->i_inode.i_mode) &&
1364 ndip->i_inode.i_nlink == (u32)-1) {
1365 error = -EMLINK;
1366 goto out_gunlock;
1367 }
1368 }
1369 }
1370
1371 /* Check out the dir to be renamed */
1372
1373 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
1375 if (error)
1376 goto out_gunlock;
1377 }
1378
1379 if (nip == NULL)
1380 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1381 error = alloc_required;
1382 if (error < 0)
1383 goto out_gunlock;
1384 error = 0;
1385
1386 if (alloc_required) {
1387 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
1388 if (!al) {
1389 error = -ENOMEM;
1390 goto out_gunlock;
1391 }
1392
1393 error = gfs2_quota_lock_check(ndip);
1394 if (error)
1395 goto out_alloc;
1396
1397 al->al_requested = sdp->sd_max_dirres;
1398
1399 error = gfs2_inplace_reserve_ri(ndip);
1400 if (error)
1401 goto out_gunlock_q;
1402
1403 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1404 gfs2_rg_blocks(al) +
1405 4 * RES_DINODE + 4 * RES_LEAF +
1406 RES_STATFS + RES_QUOTA + 4, 0);
1407 if (error)
1408 goto out_ipreserv;
1409 } else {
1410 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1411 5 * RES_LEAF + 4, 0);
1412 if (error)
1413 goto out_gunlock;
1414 }
1415
1416 /* Remove the target file, if it exists */
1417
1418 if (nip) {
1419 struct buffer_head *bh;
1420 error = gfs2_meta_inode_buffer(nip, &bh);
1421 if (error)
1422 goto out_end_trans;
1423 error = gfs2_unlink_inode(ndip, ndentry, bh);
1424 brelse(bh);
1425 }
1426
1427 if (dir_rename) {
1428 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1429 if (error)
1430 goto out_end_trans;
1431 } else {
1432 struct buffer_head *dibh;
1433 error = gfs2_meta_inode_buffer(ip, &dibh);
1434 if (error)
1435 goto out_end_trans;
1436 ip->i_inode.i_ctime = CURRENT_TIME;
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh);
1440 }
1441
1442 error = gfs2_dir_del(odip, odentry);
1443 if (error)
1444 goto out_end_trans;
1445
1446 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1447 if (error)
1448 goto out_end_trans;
1449
1450out_end_trans:
1451 gfs2_trans_end(sdp);
1452out_ipreserv:
1453 if (alloc_required)
1454 gfs2_inplace_release(ndip);
1455out_gunlock_q:
1456 if (alloc_required)
1457 gfs2_quota_unlock(ndip);
1458out_alloc:
1459 if (alloc_required)
1460 gfs2_alloc_put(ndip);
1461out_gunlock:
1462 while (x--) {
1463 gfs2_glock_dq(ghs + x);
1464 gfs2_holder_uninit(ghs + x);
1465 }
1466out_gunlock_r:
1467 if (r_gh.gh_gl)
1468 gfs2_glock_dq_uninit(&r_gh);
1469out:
1470 gfs2_glock_dq_uninit(&ri_gh);
1471 return error;
1472}
1473
1474/**
1475 * gfs2_follow_link - Follow a symbolic link
1476 * @dentry: The dentry of the link
1477 * @nd: Data that we pass to vfs_follow_link()
1478 *
1479 * This can handle symlinks of any size.
1480 *
1481 * Returns: 0 on success or error code
1482 */
1483
1484static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1485{
1486 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1487 struct gfs2_holder i_gh;
1488 struct buffer_head *dibh;
1489 unsigned int size;
1490 char *buf;
1491 int error;
1492
1493 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1494 error = gfs2_glock_nq(&i_gh);
1495 if (error) {
1496 gfs2_holder_uninit(&i_gh);
1497 nd_set_link(nd, ERR_PTR(error));
1498 return NULL;
1499 }
1500
1501 size = (unsigned int)i_size_read(&ip->i_inode);
1502 if (size == 0) {
1503 gfs2_consist_inode(ip);
1504 buf = ERR_PTR(-EIO);
1505 goto out;
1506 }
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (error) {
1510 buf = ERR_PTR(error);
1511 goto out;
1512 }
1513
1514 buf = kzalloc(size + 1, GFP_NOFS);
1515 if (!buf)
1516 buf = ERR_PTR(-ENOMEM);
1517 else
1518 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1519 brelse(dibh);
1520out:
1521 gfs2_glock_dq_uninit(&i_gh);
1522 nd_set_link(nd, buf);
1523 return NULL;
1524}
1525
1526static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1527{
1528 char *s = nd_get_link(nd);
1529 if (!IS_ERR(s))
1530 kfree(s);
1531}
1532
1533/**
1534 * gfs2_permission -
1535 * @inode: The inode
1536 * @mask: The mask to be tested
1537 * @flags: Indicates whether this is an RCU path walk or not
1538 *
1539 * This may be called from the VFS directly, or from within GFS2 with the
1540 * inode locked, so we look to see if the glock is already locked and only
1541 * lock the glock if its not already been done.
1542 *
1543 * Returns: errno
1544 */
1545
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1547{
1548 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh;
1550 int error;
1551 int unlock = 0;
1552
1553
1554 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU)
1557 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error)
1560 return error;
1561 unlock = 1;
1562 }
1563
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES;
1566 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1568 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh);
1570
1571 return error;
880} 1572}
881 1573
882static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1574static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -902,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
902 * @ip: 1594 * @ip:
903 * @attr: 1595 * @attr:
904 * 1596 *
905 * Called with a reference on the vnode.
906 *
907 * Returns: errno 1597 * Returns: errno
908 */ 1598 */
909 1599
@@ -923,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
923 return error; 1613 return error;
924} 1614}
925 1615
926void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1616static int setattr_chown(struct inode *inode, struct iattr *attr)
927{ 1617{
928 struct gfs2_dinode *str = buf; 1618 struct gfs2_inode *ip = GFS2_I(inode);
929 1619 struct gfs2_sbd *sdp = GFS2_SB(inode);
930 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1620 u32 ouid, ogid, nuid, ngid;
931 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1621 int error;
932 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1622
933 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1623 ouid = inode->i_uid;
934 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1624 ogid = inode->i_gid;
935 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1625 nuid = attr->ia_uid;
936 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1626 ngid = attr->ia_gid;
937 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1627
938 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1628 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
939 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 1629 ouid = nuid = NO_QUOTA_CHANGE;
940 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1630 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
941 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1631 ogid = ngid = NO_QUOTA_CHANGE;
942 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1632
943 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1633 if (!gfs2_alloc_get(ip))
944 1634 return -ENOMEM;
945 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1635
946 str->di_goal_data = cpu_to_be64(ip->i_goal); 1636 error = gfs2_quota_lock(ip, nuid, ngid);
947 str->di_generation = cpu_to_be64(ip->i_generation); 1637 if (error)
948 1638 goto out_alloc;
949 str->di_flags = cpu_to_be32(ip->i_diskflags); 1639
950 str->di_height = cpu_to_be16(ip->i_height); 1640 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
951 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1641 error = gfs2_quota_check(ip, nuid, ngid);
952 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1642 if (error)
953 GFS2_FORMAT_DE : 0); 1643 goto out_gunlock_q;
954 str->di_depth = cpu_to_be16(ip->i_depth); 1644 }
955 str->di_entries = cpu_to_be32(ip->i_entries); 1645
956 1646 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
957 str->di_eattr = cpu_to_be64(ip->i_eattr); 1647 if (error)
958 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1648 goto out_gunlock_q;
959 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1649
960 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1650 error = gfs2_setattr_simple(ip, attr);
961} 1651 if (error)
962 1652 goto out_end_trans;
963void gfs2_dinode_print(const struct gfs2_inode *ip) 1653
964{ 1654 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
965 printk(KERN_INFO " no_formal_ino = %llu\n", 1655 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
966 (unsigned long long)ip->i_no_formal_ino); 1656 gfs2_quota_change(ip, -blocks, ouid, ogid);
967 printk(KERN_INFO " no_addr = %llu\n", 1657 gfs2_quota_change(ip, blocks, nuid, ngid);
968 (unsigned long long)ip->i_no_addr); 1658 }
969 printk(KERN_INFO " i_size = %llu\n", 1659
970 (unsigned long long)i_size_read(&ip->i_inode)); 1660out_end_trans:
971 printk(KERN_INFO " blocks = %llu\n", 1661 gfs2_trans_end(sdp);
972 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1662out_gunlock_q:
973 printk(KERN_INFO " i_goal = %llu\n", 1663 gfs2_quota_unlock(ip);
974 (unsigned long long)ip->i_goal); 1664out_alloc:
975 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1665 gfs2_alloc_put(ip);
976 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1666 return error;
977 printk(KERN_INFO " i_depth = %u\n", ip->i_depth);
978 printk(KERN_INFO " i_entries = %u\n", ip->i_entries);
979 printk(KERN_INFO " i_eattr = %llu\n",
980 (unsigned long long)ip->i_eattr);
981} 1667}
982 1668
1669/**
1670 * gfs2_setattr - Change attributes on an inode
1671 * @dentry: The dentry which is changing
1672 * @attr: The structure describing the change
1673 *
1674 * The VFS layer wants to change one or more of an inodes attributes. Write
1675 * that change out to disk.
1676 *
1677 * Returns: errno
1678 */
1679
1680static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1681{
1682 struct inode *inode = dentry->d_inode;
1683 struct gfs2_inode *ip = GFS2_I(inode);
1684 struct gfs2_holder i_gh;
1685 int error;
1686
1687 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1688 if (error)
1689 return error;
1690
1691 error = -EPERM;
1692 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1693 goto out;
1694
1695 error = inode_change_ok(inode, attr);
1696 if (error)
1697 goto out;
1698
1699 if (attr->ia_valid & ATTR_SIZE)
1700 error = gfs2_setattr_size(inode, attr->ia_size);
1701 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1702 error = setattr_chown(inode, attr);
1703 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1704 error = gfs2_acl_chmod(ip, attr);
1705 else
1706 error = gfs2_setattr_simple(ip, attr);
1707
1708out:
1709 gfs2_glock_dq_uninit(&i_gh);
1710 if (!error)
1711 mark_inode_dirty(inode);
1712 return error;
1713}
1714
1715/**
1716 * gfs2_getattr - Read out an inode's attributes
1717 * @mnt: The vfsmount the inode is being accessed from
1718 * @dentry: The dentry to stat
1719 * @stat: The inode's stats
1720 *
1721 * This may be called from the VFS directly, or from within GFS2 with the
1722 * inode locked, so we look to see if the glock is already locked and only
1723 * lock the glock if its not already been done. Note that its the NFS
1724 * readdirplus operation which causes this to be called (from filldir)
1725 * with the glock already held.
1726 *
1727 * Returns: errno
1728 */
1729
1730static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1731 struct kstat *stat)
1732{
1733 struct inode *inode = dentry->d_inode;
1734 struct gfs2_inode *ip = GFS2_I(inode);
1735 struct gfs2_holder gh;
1736 int error;
1737 int unlock = 0;
1738
1739 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1741 if (error)
1742 return error;
1743 unlock = 1;
1744 }
1745
1746 generic_fillattr(inode, stat);
1747 if (unlock)
1748 gfs2_glock_dq_uninit(&gh);
1749
1750 return 0;
1751}
1752
1753static int gfs2_setxattr(struct dentry *dentry, const char *name,
1754 const void *data, size_t size, int flags)
1755{
1756 struct inode *inode = dentry->d_inode;
1757 struct gfs2_inode *ip = GFS2_I(inode);
1758 struct gfs2_holder gh;
1759 int ret;
1760
1761 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1762 ret = gfs2_glock_nq(&gh);
1763 if (ret == 0) {
1764 ret = generic_setxattr(dentry, name, data, size, flags);
1765 gfs2_glock_dq(&gh);
1766 }
1767 gfs2_holder_uninit(&gh);
1768 return ret;
1769}
1770
1771static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1772 void *data, size_t size)
1773{
1774 struct inode *inode = dentry->d_inode;
1775 struct gfs2_inode *ip = GFS2_I(inode);
1776 struct gfs2_holder gh;
1777 int ret;
1778
1779 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1780 ret = gfs2_glock_nq(&gh);
1781 if (ret == 0) {
1782 ret = generic_getxattr(dentry, name, data, size);
1783 gfs2_glock_dq(&gh);
1784 }
1785 gfs2_holder_uninit(&gh);
1786 return ret;
1787}
1788
1789static int gfs2_removexattr(struct dentry *dentry, const char *name)
1790{
1791 struct inode *inode = dentry->d_inode;
1792 struct gfs2_inode *ip = GFS2_I(inode);
1793 struct gfs2_holder gh;
1794 int ret;
1795
1796 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1797 ret = gfs2_glock_nq(&gh);
1798 if (ret == 0) {
1799 ret = generic_removexattr(dentry, name);
1800 gfs2_glock_dq(&gh);
1801 }
1802 gfs2_holder_uninit(&gh);
1803 return ret;
1804}
1805
1806static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1807 u64 start, u64 len)
1808{
1809 struct gfs2_inode *ip = GFS2_I(inode);
1810 struct gfs2_holder gh;
1811 int ret;
1812
1813 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1814 if (ret)
1815 return ret;
1816
1817 mutex_lock(&inode->i_mutex);
1818
1819 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1820 if (ret)
1821 goto out;
1822
1823 if (gfs2_is_stuffed(ip)) {
1824 u64 phys = ip->i_no_addr << inode->i_blkbits;
1825 u64 size = i_size_read(inode);
1826 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1827 FIEMAP_EXTENT_DATA_INLINE;
1828 phys += sizeof(struct gfs2_dinode);
1829 phys += start;
1830 if (start + len > size)
1831 len = size - start;
1832 if (start < size)
1833 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1834 len, flags);
1835 if (ret == 1)
1836 ret = 0;
1837 } else {
1838 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1839 gfs2_block_map);
1840 }
1841
1842 gfs2_glock_dq_uninit(&gh);
1843out:
1844 mutex_unlock(&inode->i_mutex);
1845 return ret;
1846}
1847
1848const struct inode_operations gfs2_file_iops = {
1849 .permission = gfs2_permission,
1850 .setattr = gfs2_setattr,
1851 .getattr = gfs2_getattr,
1852 .setxattr = gfs2_setxattr,
1853 .getxattr = gfs2_getxattr,
1854 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap,
1857};
1858
1859const struct inode_operations gfs2_dir_iops = {
1860 .create = gfs2_create,
1861 .lookup = gfs2_lookup,
1862 .link = gfs2_link,
1863 .unlink = gfs2_unlink,
1864 .symlink = gfs2_symlink,
1865 .mkdir = gfs2_mkdir,
1866 .rmdir = gfs2_unlink,
1867 .mknod = gfs2_mknod,
1868 .rename = gfs2_rename,
1869 .permission = gfs2_permission,
1870 .setattr = gfs2_setattr,
1871 .getattr = gfs2_getattr,
1872 .setxattr = gfs2_setxattr,
1873 .getxattr = gfs2_getxattr,
1874 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap,
1877};
1878
1879const struct inode_operations gfs2_symlink_iops = {
1880 .readlink = generic_readlink,
1881 .follow_link = gfs2_follow_link,
1882 .put_link = gfs2_put_link,
1883 .permission = gfs2_permission,
1884 .setattr = gfs2_setattr,
1885 .getattr = gfs2_getattr,
1886 .setxattr = gfs2_setxattr,
1887 .getxattr = gfs2_getxattr,
1888 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap,
1891};
1892
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3e00a66e7cbd..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -97,26 +97,21 @@ err:
97} 97}
98 98
99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
100 u64 no_addr, u64 no_formal_ino); 100 u64 no_addr, u64 no_formal_ino,
101 int non_block);
101extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
102 u64 *no_formal_ino, 103 u64 *no_formal_ino,
103 unsigned int blktype); 104 unsigned int blktype);
104extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
105 106
106extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
107 108
108extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
109extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
110extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
111 int is_root); 110 int is_root);
112extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
113 const struct qstr *name,
114 unsigned int mode, dev_t dev);
115extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
116extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
117extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
118extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
119extern void gfs2_dinode_print(const struct gfs2_inode *ip);
120 115
121extern const struct inode_operations gfs2_file_iops; 116extern const struct inode_operations gfs2_file_iops;
122extern const struct inode_operations gfs2_dir_iops; 117extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..cec26c00b50d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
83/** 84/**
84 * gfs2_ail1_start_one - Start I/O on a part of the AIL 85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
85 * @sdp: the filesystem 86 * @sdp: the filesystem
86 * @tr: the part of the AIL 87 * @wbc: The writeback control structure
88 * @ai: The ail structure
87 * 89 *
88 */ 90 */
89 91
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 92static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
93 struct writeback_control *wbc,
94 struct gfs2_ail *ai)
91__releases(&sdp->sd_ail_lock) 95__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_ail_lock) 96__acquires(&sdp->sd_ail_lock)
93{ 97{
98 struct gfs2_glock *gl = NULL;
99 struct address_space *mapping;
94 struct gfs2_bufdata *bd, *s; 100 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 101 struct buffer_head *bh;
96 int retry;
97 102
98 do { 103 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
99 retry = 0; 104 bh = bd->bd_bh;
100 105
101 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 106 gfs2_assert(sdp, bd->bd_ail == ai);
102 bd_ail_st_list) {
103 bh = bd->bd_bh;
104 107
105 gfs2_assert(sdp, bd->bd_ail == ai); 108 if (!buffer_busy(bh)) {
109 if (!buffer_uptodate(bh))
110 gfs2_io_error_bh(sdp, bh);
111 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
112 continue;
113 }
106 114
107 if (!buffer_busy(bh)) { 115 if (!buffer_dirty(bh))
108 if (!buffer_uptodate(bh)) 116 continue;
109 gfs2_io_error_bh(sdp, bh); 117 if (gl == bd->bd_gl)
110 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 continue;
111 continue; 119 gl = bd->bd_gl;
112 } 120 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
121 mapping = bh->b_page->mapping;
122 if (!mapping)
123 continue;
124 spin_unlock(&sdp->sd_ail_lock);
125 generic_writepages(mapping, wbc);
126 spin_lock(&sdp->sd_ail_lock);
127 if (wbc->nr_to_write <= 0)
128 break;
129 return 1;
130 }
113 131
114 if (!buffer_dirty(bh)) 132 return 0;
115 continue; 133}
116 134
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 135
119 get_bh(bh); 136/**
120 spin_unlock(&sdp->sd_ail_lock); 137 * gfs2_ail1_flush - start writeback of some ail1 entries
121 lock_buffer(bh); 138 * @sdp: The super block
122 if (test_clear_buffer_dirty(bh)) { 139 * @wbc: The writeback control structure
123 bh->b_end_io = end_buffer_write_sync; 140 *
124 submit_bh(WRITE_SYNC, bh); 141 * Writes back some ail1 entries, according to the limits in the
125 } else { 142 * writeback control structure
126 unlock_buffer(bh); 143 */
127 brelse(bh); 144
128 } 145void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
129 spin_lock(&sdp->sd_ail_lock); 146{
130 147 struct list_head *head = &sdp->sd_ail1_list;
131 retry = 1; 148 struct gfs2_ail *ai;
149
150 trace_gfs2_ail_flush(sdp, wbc, 1);
151 spin_lock(&sdp->sd_ail_lock);
152restart:
153 list_for_each_entry_reverse(ai, head, ai_list) {
154 if (wbc->nr_to_write <= 0)
132 break; 155 break;
133 } 156 if (gfs2_ail1_start_one(sdp, wbc, ai))
134 } while (retry); 157 goto restart;
158 }
159 spin_unlock(&sdp->sd_ail_lock);
160 trace_gfs2_ail_flush(sdp, wbc, 0);
161}
162
163/**
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
166 */
167
168static void gfs2_ail1_start(struct gfs2_sbd *sdp)
169{
170 struct writeback_control wbc = {
171 .sync_mode = WB_SYNC_NONE,
172 .nr_to_write = LONG_MAX,
173 .range_start = 0,
174 .range_end = LLONG_MAX,
175 };
176
177 return gfs2_ail1_flush(sdp, &wbc);
135} 178}
136 179
137/** 180/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
141 * 184 *
142 */ 185 */
143 186
144static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) 187static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
145{ 188{
146 struct gfs2_bufdata *bd, *s; 189 struct gfs2_bufdata *bd, *s;
147 struct buffer_head *bh; 190 struct buffer_head *bh;
@@ -149,71 +192,37 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
149 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 192 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
150 bd_ail_st_list) { 193 bd_ail_st_list) {
151 bh = bd->bd_bh; 194 bh = bd->bd_bh;
152
153 gfs2_assert(sdp, bd->bd_ail == ai); 195 gfs2_assert(sdp, bd->bd_ail == ai);
154 196 if (buffer_busy(bh))
155 if (buffer_busy(bh)) { 197 continue;
156 if (flags & DIO_ALL)
157 continue;
158 else
159 break;
160 }
161
162 if (!buffer_uptodate(bh)) 198 if (!buffer_uptodate(bh))
163 gfs2_io_error_bh(sdp, bh); 199 gfs2_io_error_bh(sdp, bh);
164
165 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 200 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
166 } 201 }
167 202
168 return list_empty(&ai->ai_ail1_list);
169} 203}
170 204
171static void gfs2_ail1_start(struct gfs2_sbd *sdp) 205/**
172{ 206 * gfs2_ail1_empty - Try to empty the ail1 lists
173 struct list_head *head; 207 * @sdp: The superblock
174 u64 sync_gen; 208 *
175 struct gfs2_ail *ai; 209 * Tries to empty the ail1 lists, starting with the oldest first
176 int done = 0; 210 */
177
178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) {
181 spin_unlock(&sdp->sd_ail_lock);
182 return;
183 }
184 sync_gen = sdp->sd_ail_sync_gen++;
185
186 while(!done) {
187 done = 1;
188 list_for_each_entry_reverse(ai, head, ai_list) {
189 if (ai->ai_sync_gen >= sync_gen)
190 continue;
191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0;
194 break;
195 }
196 }
197
198 spin_unlock(&sdp->sd_ail_lock);
199}
200 211
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 212static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
202{ 213{
203 struct gfs2_ail *ai, *s; 214 struct gfs2_ail *ai, *s;
204 int ret; 215 int ret;
205 216
206 spin_lock(&sdp->sd_ail_lock); 217 spin_lock(&sdp->sd_ail_lock);
207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 218 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 219 gfs2_ail1_empty_one(sdp, ai);
220 if (list_empty(&ai->ai_ail1_list))
210 list_move(&ai->ai_list, &sdp->sd_ail2_list); 221 list_move(&ai->ai_list, &sdp->sd_ail2_list);
211 else if (!(flags & DIO_ALL)) 222 else
212 break; 223 break;
213 } 224 }
214
215 ret = list_empty(&sdp->sd_ail1_list); 225 ret = list_empty(&sdp->sd_ail1_list);
216
217 spin_unlock(&sdp->sd_ail_lock); 226 spin_unlock(&sdp->sd_ail_lock);
218 227
219 return ret; 228 return ret;
@@ -574,7 +583,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
574 set_buffer_uptodate(bh); 583 set_buffer_uptodate(bh);
575 clear_buffer_dirty(bh); 584 clear_buffer_dirty(bh);
576 585
577 gfs2_ail1_empty(sdp, 0); 586 gfs2_ail1_empty(sdp);
578 tail = current_tail(sdp); 587 tail = current_tail(sdp);
579 588
580 lh = (struct gfs2_log_header *)bh->b_data; 589 lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,7 +878,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
869 gfs2_log_flush(sdp, NULL); 878 gfs2_log_flush(sdp, NULL);
870 for (;;) { 879 for (;;) {
871 gfs2_ail1_start(sdp); 880 gfs2_ail1_start(sdp);
872 if (gfs2_ail1_empty(sdp, DIO_ALL)) 881 if (gfs2_ail1_empty(sdp))
873 break; 882 break;
874 msleep(10); 883 msleep(10);
875 } 884 }
@@ -905,17 +914,15 @@ int gfs2_logd(void *data)
905 914
906 preflush = atomic_read(&sdp->sd_log_pinned); 915 preflush = atomic_read(&sdp->sd_log_pinned);
907 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 916 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
908 gfs2_ail1_empty(sdp, DIO_ALL); 917 gfs2_ail1_empty(sdp);
909 gfs2_log_flush(sdp, NULL); 918 gfs2_log_flush(sdp, NULL);
910 gfs2_ail1_empty(sdp, DIO_ALL);
911 } 919 }
912 920
913 if (gfs2_ail_flush_reqd(sdp)) { 921 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_ail1_start(sdp); 922 gfs2_ail1_start(sdp);
915 io_schedule(); 923 io_schedule();
916 gfs2_ail1_empty(sdp, 0); 924 gfs2_ail1_empty(sdp);
917 gfs2_log_flush(sdp, NULL); 925 gfs2_log_flush(sdp, NULL);
918 gfs2_ail1_empty(sdp, DIO_ALL);
919 } 926 }
920 927
921 wake_up(&sdp->sd_log_waitq); 928 wake_up(&sdp->sd_log_waitq);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/writeback.h>
15#include "incore.h" 16#include "incore.h"
16 17
17/** 18/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
61extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
63extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
62 64
63extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); 65extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 66extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40{ 40{
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 42
43 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 43 BUG_ON(!current->journal_info);
44 44
45 clear_buffer_dirty(bh); 45 clear_buffer_dirty(bh);
46 if (test_set_buffer_pinned(bh)) 46 if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
65 * @sdp: the filesystem the buffer belongs to 65 * @sdp: the filesystem the buffer belongs to
66 * @bh: The buffer to unpin 66 * @bh: The buffer to unpin
67 * @ai: 67 * @ai:
68 * @flags: The inode dirty flags
68 * 69 *
69 */ 70 */
70 71
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
73{ 74{
74 struct gfs2_bufdata *bd = bh->b_private; 75 struct gfs2_bufdata *bd = bh->b_private;
75 76
76 gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 77 BUG_ON(!buffer_uptodate(bh));
77 78 BUG_ON(!buffer_pinned(bh));
78 if (!buffer_pinned(bh))
79 gfs2_assert_withdraw(sdp, 0);
80 79
81 lock_buffer(bh); 80 lock_buffer(bh);
82 mark_buffer_dirty(bh); 81 mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 94 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
96 spin_unlock(&sdp->sd_ail_lock); 95 spin_unlock(&sdp->sd_ail_lock);
97 96
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) 97 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
100 trace_gfs2_pin(bd, 0); 98 trace_gfs2_pin(bd, 0);
101 unlock_buffer(bh); 99 unlock_buffer(bh);
102 atomic_dec(&sdp->sd_log_pinned); 100 atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 320
323static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 321static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
324{ 322{
323 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
324 struct gfs2_glock *gl = bd->bd_gl;
325 struct gfs2_trans *tr; 325 struct gfs2_trans *tr;
326 326
327 tr = current->journal_info; 327 tr = current->journal_info;
328 tr->tr_touched = 1; 328 tr->tr_touched = 1;
329 tr->tr_num_revoke++; 329 tr->tr_num_revoke++;
330 sdp->sd_log_num_revoke++; 330 sdp->sd_log_num_revoke++;
331 atomic_inc(&gl->gl_revokes);
332 set_bit(GLF_LFLUSH, &gl->gl_flags);
331 list_add(&le->le_list, &sdp->sd_log_le_revoke); 333 list_add(&le->le_list, &sdp->sd_log_le_revoke);
332} 334}
333 335
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
350 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 352 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
351 offset = sizeof(struct gfs2_log_descriptor); 353 offset = sizeof(struct gfs2_log_descriptor);
352 354
353 while (!list_empty(head)) { 355 list_for_each_entry(bd, head, bd_le.le_list) {
354 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
355 list_del_init(&bd->bd_le.le_list);
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
367 } 367 }
368 368
369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
370 kmem_cache_free(gfs2_bufdata_cachep, bd);
371
372 offset += sizeof(u64); 370 offset += sizeof(u64);
373 } 371 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 372 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
376 submit_bh(WRITE_SYNC, bh); 374 submit_bh(WRITE_SYNC, bh);
377} 375}
378 376
377static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
378{
379 struct list_head *head = &sdp->sd_log_le_revoke;
380 struct gfs2_bufdata *bd;
381 struct gfs2_glock *gl;
382
383 while (!list_empty(head)) {
384 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
385 list_del_init(&bd->bd_le.le_list);
386 gl = bd->bd_gl;
387 atomic_dec(&gl->gl_revokes);
388 clear_bit(GLF_LFLUSH, &gl->gl_flags);
389 kmem_cache_free(gfs2_bufdata_cachep, bd);
390 }
391}
392
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 393static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
380 struct gfs2_log_header_host *head, int pass) 394 struct gfs2_log_header_host *head, int pass)
381{ 395{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
749const struct gfs2_log_operations gfs2_revoke_lops = { 763const struct gfs2_log_operations gfs2_revoke_lops = {
750 .lo_add = revoke_lo_add, 764 .lo_add = revoke_lo_add,
751 .lo_before_commit = revoke_lo_before_commit, 765 .lo_before_commit = revoke_lo_before_commit,
766 .lo_after_commit = revoke_lo_after_commit,
752 .lo_before_scan = revoke_lo_before_scan, 767 .lo_before_scan = revoke_lo_before_scan,
753 .lo_scan_elements = revoke_lo_scan_elements, 768 .lo_scan_elements = revoke_lo_scan_elements,
754 .lo_after_scan = revoke_lo_after_scan, 769 .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
53 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
54 INIT_LIST_HEAD(&gl->gl_ail_list); 54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0); 55 atomic_set(&gl->gl_ail_count, 0);
56 atomic_set(&gl->gl_revokes, 0);
56} 57}
57 58
58static void gfs2_init_gl_aspace_once(void *foo) 59static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
31#include "rgrp.h" 31#include "rgrp.h"
32#include "trans.h" 32#include "trans.h"
33#include "util.h" 33#include "util.h"
34#include "trace_gfs2.h"
34 35
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 37{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
310 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
311 312
312 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0);
313 atomic_dec(&sdp->sd_log_pinned); 315 atomic_dec(&sdp->sd_log_pinned);
314 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
315 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
77 77
78#define buffer_busy(bh) \ 78#define buffer_busy(bh) \
79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) 79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
80#define buffer_in_io(bh) \
81((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
82 80
83#endif /* __DIO_DOT_H__ */ 81#endif /* __DIO_DOT_H__ */
84 82
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 42ef24355afb..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
126 * changed. 126 * changed.
127 */ 127 */
128 128
129static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) 129static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
130{ 130{
131 struct gfs2_sb_host *sb = &sdp->sd_sb;
132
131 if (sb->sb_magic != GFS2_MAGIC || 133 if (sb->sb_magic != GFS2_MAGIC ||
132 sb->sb_type != GFS2_METATYPE_SB) { 134 sb->sb_type != GFS2_METATYPE_SB) {
133 if (!silent) 135 if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
157 unlock_page(page); 159 unlock_page(page);
158} 160}
159 161
160static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 162static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
161{ 163{
164 struct gfs2_sb_host *sb = &sdp->sd_sb;
165 struct super_block *s = sdp->sd_vfs;
162 const struct gfs2_sb *str = buf; 166 const struct gfs2_sb *str = buf;
163 167
164 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); 168 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
175 179
176 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 180 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
177 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 181 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
178 memcpy(sb->sb_uuid, str->sb_uuid, 16); 182 memcpy(s->s_uuid, str->sb_uuid, 16);
179} 183}
180 184
181/** 185/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
197 * Returns: 0 on success or error 201 * Returns: 0 on success or error
198 */ 202 */
199 203
200static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) 204static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
201{ 205{
202 struct super_block *sb = sdp->sd_vfs; 206 struct super_block *sb = sdp->sd_vfs;
203 struct gfs2_sb *p; 207 struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
227 return -EIO; 231 return -EIO;
228 } 232 }
229 p = kmap(page); 233 p = kmap(page);
230 gfs2_sb_in(&sdp->sd_sb, p); 234 gfs2_sb_in(sdp, p);
231 kunmap(page); 235 kunmap(page);
232 __free_page(page); 236 __free_page(page);
233 return 0; 237 return gfs2_check_sb(sdp, silent);
234} 238}
235 239
236/** 240/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
247 unsigned int x; 251 unsigned int x;
248 int error; 252 int error;
249 253
250 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 254 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
251 if (error) { 255 if (error) {
252 if (!silent) 256 if (!silent)
253 fs_err(sdp, "can't read superblock\n"); 257 fs_err(sdp, "can't read superblock\n");
254 return error; 258 return error;
255 } 259 }
256 260
257 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
258 if (error)
259 return error;
260
261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
262 GFS2_BASIC_BLOCK_SHIFT; 262 GFS2_BASIC_BLOCK_SHIFT;
263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
340 /* Try to autodetect */ 340 /* Try to autodetect */
341 341
342 if (!proto[0] || !table[0]) { 342 if (!proto[0] || !table[0]) {
343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
344 if (error) 344 if (error)
345 return error; 345 return error;
346 346
347 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
348 if (error)
349 goto out;
350
351 if (!proto[0]) 347 if (!proto[0])
352 proto = sdp->sd_sb.sb_lockproto; 348 proto = sdp->sd_sb.sb_lockproto;
353 if (!table[0]) 349 if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
364 while ((table = strchr(table, '/'))) 360 while ((table = strchr(table, '/')))
365 *table = '_'; 361 *table = '_';
366 362
367out:
368 return error; 363 return error;
369} 364}
370 365
@@ -430,7 +425,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
430 struct dentry *dentry; 425 struct dentry *dentry;
431 struct inode *inode; 426 struct inode *inode;
432 427
433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 428 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
434 if (IS_ERR(inode)) { 429 if (IS_ERR(inode)) {
435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 430 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
436 return PTR_ERR(inode); 431 return PTR_ERR(inode);
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1119 if (sdp->sd_args.ar_statfs_quantum) { 1114 if (sdp->sd_args.ar_statfs_quantum) {
1120 sdp->sd_tune.gt_statfs_slow = 0; 1115 sdp->sd_tune.gt_statfs_slow = 0;
1121 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; 1116 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1122 } 1117 } else {
1123 else {
1124 sdp->sd_tune.gt_statfs_slow = 1; 1118 sdp->sd_tune.gt_statfs_slow = 1;
1125 sdp->sd_tune.gt_statfs_quantum = 30; 1119 sdp->sd_tune.gt_statfs_quantum = 30;
1126 } 1120 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h>
20#include <linux/fiemap.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "acl.h"
26#include "bmap.h"
27#include "dir.h"
28#include "xattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "util.h"
36#include "super.h"
37
38/**
39 * gfs2_create - Create a file
40 * @dir: The directory in which to create the file
41 * @dentry: The dentry of the new file
42 * @mode: The mode of the new file
43 *
44 * Returns: errno
45 */
46
47static int gfs2_create(struct inode *dir, struct dentry *dentry,
48 int mode, struct nameidata *nd)
49{
50 struct gfs2_inode *dip = GFS2_I(dir);
51 struct gfs2_sbd *sdp = GFS2_SB(dir);
52 struct gfs2_holder ghs[2];
53 struct inode *inode;
54
55 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
56
57 for (;;) {
58 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
59 if (!IS_ERR(inode)) {
60 gfs2_trans_end(sdp);
61 if (dip->i_alloc->al_rgd)
62 gfs2_inplace_release(dip);
63 gfs2_quota_unlock(dip);
64 gfs2_alloc_put(dip);
65 gfs2_glock_dq_uninit_m(2, ghs);
66 mark_inode_dirty(inode);
67 break;
68 } else if (PTR_ERR(inode) != -EEXIST ||
69 (nd && nd->flags & LOOKUP_EXCL)) {
70 gfs2_holder_uninit(ghs);
71 return PTR_ERR(inode);
72 }
73
74 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
75 if (inode) {
76 if (!IS_ERR(inode)) {
77 gfs2_holder_uninit(ghs);
78 break;
79 } else {
80 gfs2_holder_uninit(ghs);
81 return PTR_ERR(inode);
82 }
83 }
84 }
85
86 d_instantiate(dentry, inode);
87
88 return 0;
89}
90
91/**
92 * gfs2_lookup - Look up a filename in a directory and return its inode
93 * @dir: The directory inode
94 * @dentry: The dentry of the new inode
95 * @nd: passed from Linux VFS, ignored by us
96 *
97 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
98 *
99 * Returns: errno
100 */
101
102static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
103 struct nameidata *nd)
104{
105 struct inode *inode = NULL;
106
107 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
108 if (inode && IS_ERR(inode))
109 return ERR_CAST(inode);
110
111 if (inode) {
112 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
113 struct gfs2_holder gh;
114 int error;
115 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
116 if (error) {
117 iput(inode);
118 return ERR_PTR(error);
119 }
120 gfs2_glock_dq_uninit(&gh);
121 return d_splice_alias(inode, dentry);
122 }
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = GFS2_I(dir);
144 struct gfs2_sbd *sdp = GFS2_SB(dir);
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = GFS2_I(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(inode->i_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq(ghs); /* parent */
158 if (error)
159 goto out_parent;
160
161 error = gfs2_glock_nq(ghs + 1); /* child */
162 if (error)
163 goto out_child;
164
165 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
166 if (error)
167 goto out_gunlock;
168
169 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
170 switch (error) {
171 case -ENOENT:
172 break;
173 case 0:
174 error = -EEXIST;
175 default:
176 goto out_gunlock;
177 }
178
179 error = -EINVAL;
180 if (!dip->i_inode.i_nlink)
181 goto out_gunlock;
182 error = -EFBIG;
183 if (dip->i_entries == (u32)-1)
184 goto out_gunlock;
185 error = -EPERM;
186 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
187 goto out_gunlock;
188 error = -EINVAL;
189 if (!ip->i_inode.i_nlink)
190 goto out_gunlock;
191 error = -EMLINK;
192 if (ip->i_inode.i_nlink == (u32)-1)
193 goto out_gunlock;
194
195 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
196 if (error < 0)
197 goto out_gunlock;
198 error = 0;
199
200 if (alloc_required) {
201 struct gfs2_alloc *al = gfs2_alloc_get(dip);
202 if (!al) {
203 error = -ENOMEM;
204 goto out_gunlock;
205 }
206
207 error = gfs2_quota_lock_check(dip);
208 if (error)
209 goto out_alloc;
210
211 al->al_requested = sdp->sd_max_dirres;
212
213 error = gfs2_inplace_reserve(dip);
214 if (error)
215 goto out_gunlock_q;
216
217 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
218 gfs2_rg_blocks(al) +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
230 if (error)
231 goto out_end_trans;
232
233 error = gfs2_change_nlink(ip, +1);
234
235out_end_trans:
236 gfs2_trans_end(sdp);
237out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240out_gunlock_q:
241 if (alloc_required)
242 gfs2_quota_unlock(dip);
243out_alloc:
244 if (alloc_required)
245 gfs2_alloc_put(dip);
246out_gunlock:
247 gfs2_glock_dq(ghs + 1);
248out_child:
249 gfs2_glock_dq(ghs);
250out_parent:
251 gfs2_holder_uninit(ghs);
252 gfs2_holder_uninit(ghs + 1);
253 if (!error) {
254 ihold(inode);
255 d_instantiate(dentry, inode);
256 mark_inode_dirty(inode);
257 }
258 return error;
259}
260
261/*
262 * gfs2_unlink_ok - check to see that a inode is still in a directory
263 * @dip: the directory
264 * @name: the name of the file
265 * @ip: the inode
266 *
267 * Assumes that the lock on (at least) @dip is held.
268 *
269 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
270 */
271
272static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
273 const struct gfs2_inode *ip)
274{
275 int error;
276
277 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
278 return -EPERM;
279
280 if ((dip->i_inode.i_mode & S_ISVTX) &&
281 dip->i_inode.i_uid != current_fsuid() &&
282 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
283 return -EPERM;
284
285 if (IS_APPEND(&dip->i_inode))
286 return -EPERM;
287
288 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
289 if (error)
290 return error;
291
292 error = gfs2_dir_check(&dip->i_inode, name, ip);
293 if (error)
294 return error;
295
296 return 0;
297}
298
299/**
300 * gfs2_unlink - Unlink a file
301 * @dir: The inode of the directory containing the file to unlink
302 * @dentry: The file itself
303 *
304 * Unlink a file. Call gfs2_unlinki()
305 *
306 * Returns: errno
307 */
308
309static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
310{
311 struct gfs2_inode *dip = GFS2_I(dir);
312 struct gfs2_sbd *sdp = GFS2_SB(dir);
313 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
314 struct gfs2_holder ghs[3];
315 struct gfs2_rgrpd *rgd;
316 struct gfs2_holder ri_gh;
317 int error;
318
319 error = gfs2_rindex_hold(sdp, &ri_gh);
320 if (error)
321 return error;
322
323 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
324 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
325
326 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
327 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
328
329
330 error = gfs2_glock_nq(ghs); /* parent */
331 if (error)
332 goto out_parent;
333
334 error = gfs2_glock_nq(ghs + 1); /* child */
335 if (error)
336 goto out_child;
337
338 error = gfs2_glock_nq(ghs + 2); /* rgrp */
339 if (error)
340 goto out_rgrp;
341
342 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
343 if (error)
344 goto out_gunlock;
345
346 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
347 if (error)
348 goto out_gunlock;
349
350 error = gfs2_dir_del(dip, &dentry->d_name);
351 if (error)
352 goto out_end_trans;
353
354 error = gfs2_change_nlink(ip, -1);
355
356out_end_trans:
357 gfs2_trans_end(sdp);
358out_gunlock:
359 gfs2_glock_dq(ghs + 2);
360out_rgrp:
361 gfs2_holder_uninit(ghs + 2);
362 gfs2_glock_dq(ghs + 1);
363out_child:
364 gfs2_holder_uninit(ghs + 1);
365 gfs2_glock_dq(ghs);
366out_parent:
367 gfs2_holder_uninit(ghs);
368 gfs2_glock_dq_uninit(&ri_gh);
369 return error;
370}
371
372/**
373 * gfs2_symlink - Create a symlink
374 * @dir: The directory to create the symlink in
375 * @dentry: The dentry to put the symlink in
376 * @symname: The thing which the link points to
377 *
378 * Returns: errno
379 */
380
381static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
382 const char *symname)
383{
384 struct gfs2_inode *dip = GFS2_I(dir), *ip;
385 struct gfs2_sbd *sdp = GFS2_SB(dir);
386 struct gfs2_holder ghs[2];
387 struct inode *inode;
388 struct buffer_head *dibh;
389 int size;
390 int error;
391
392 /* Must be stuffed with a null terminator for gfs2_follow_link() */
393 size = strlen(symname);
394 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
395 return -ENAMETOOLONG;
396
397 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
398
399 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
400 if (IS_ERR(inode)) {
401 gfs2_holder_uninit(ghs);
402 return PTR_ERR(inode);
403 }
404
405 ip = ghs[1].gh_gl->gl_object;
406
407 i_size_write(inode, size);
408
409 error = gfs2_meta_inode_buffer(ip, &dibh);
410
411 if (!gfs2_assert_withdraw(sdp, !error)) {
412 gfs2_dinode_out(ip, dibh->b_data);
413 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
414 size);
415 brelse(dibh);
416 }
417
418 gfs2_trans_end(sdp);
419 if (dip->i_alloc->al_rgd)
420 gfs2_inplace_release(dip);
421 gfs2_quota_unlock(dip);
422 gfs2_alloc_put(dip);
423
424 gfs2_glock_dq_uninit_m(2, ghs);
425
426 d_instantiate(dentry, inode);
427 mark_inode_dirty(inode);
428
429 return 0;
430}
431
432/**
433 * gfs2_mkdir - Make a directory
434 * @dir: The parent directory of the new one
435 * @dentry: The dentry of the new directory
436 * @mode: The mode of the new directory
437 *
438 * Returns: errno
439 */
440
441static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct gfs2_inode *dip = GFS2_I(dir), *ip;
444 struct gfs2_sbd *sdp = GFS2_SB(dir);
445 struct gfs2_holder ghs[2];
446 struct inode *inode;
447 struct buffer_head *dibh;
448 int error;
449
450 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
451
452 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
453 if (IS_ERR(inode)) {
454 gfs2_holder_uninit(ghs);
455 return PTR_ERR(inode);
456 }
457
458 ip = ghs[1].gh_gl->gl_object;
459
460 ip->i_inode.i_nlink = 2;
461 i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
462 ip->i_diskflags |= GFS2_DIF_JDATA;
463 ip->i_entries = 2;
464
465 error = gfs2_meta_inode_buffer(ip, &dibh);
466
467 if (!gfs2_assert_withdraw(sdp, !error)) {
468 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
469 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
473 dent->de_inum = di->di_num; /* already GFS2 endian */
474 dent->de_type = cpu_to_be16(DT_DIR);
475 di->di_entries = cpu_to_be32(1);
476
477 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
478 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
479
480 gfs2_inum_out(dip, dent);
481 dent->de_type = cpu_to_be16(DT_DIR);
482
483 gfs2_dinode_out(ip, di);
484
485 brelse(dibh);
486 }
487
488 error = gfs2_change_nlink(dip, +1);
489 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
490
491 gfs2_trans_end(sdp);
492 if (dip->i_alloc->al_rgd)
493 gfs2_inplace_release(dip);
494 gfs2_quota_unlock(dip);
495 gfs2_alloc_put(dip);
496
497 gfs2_glock_dq_uninit_m(2, ghs);
498
499 d_instantiate(dentry, inode);
500 mark_inode_dirty(inode);
501
502 return 0;
503}
504
505/**
506 * gfs2_rmdiri - Remove a directory
507 * @dip: The parent directory of the directory to be removed
508 * @name: The name of the directory to be removed
509 * @ip: The GFS2 inode of the directory to be removed
510 *
511 * Assumes Glocks on dip and ip are held
512 *
513 * Returns: errno
514 */
515
516static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
517 struct gfs2_inode *ip)
518{
519 int error;
520
521 if (ip->i_entries != 2) {
522 if (gfs2_consist_inode(ip))
523 gfs2_dinode_print(ip);
524 return -EIO;
525 }
526
527 error = gfs2_dir_del(dip, name);
528 if (error)
529 return error;
530
531 error = gfs2_change_nlink(dip, -1);
532 if (error)
533 return error;
534
535 error = gfs2_dir_del(ip, &gfs2_qdot);
536 if (error)
537 return error;
538
539 error = gfs2_dir_del(ip, &gfs2_qdotdot);
540 if (error)
541 return error;
542
543 /* It looks odd, but it really should be done twice */
544 error = gfs2_change_nlink(ip, -1);
545 if (error)
546 return error;
547
548 error = gfs2_change_nlink(ip, -1);
549 if (error)
550 return error;
551
552 return error;
553}
554
555/**
556 * gfs2_rmdir - Remove a directory
557 * @dir: The parent directory of the directory to be removed
558 * @dentry: The dentry of the directory to remove
559 *
560 * Remove a directory. Call gfs2_rmdiri()
561 *
562 * Returns: errno
563 */
564
565static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
566{
567 struct gfs2_inode *dip = GFS2_I(dir);
568 struct gfs2_sbd *sdp = GFS2_SB(dir);
569 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
570 struct gfs2_holder ghs[3];
571 struct gfs2_rgrpd *rgd;
572 struct gfs2_holder ri_gh;
573 int error;
574
575 error = gfs2_rindex_hold(sdp, &ri_gh);
576 if (error)
577 return error;
578 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
579 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
580
581 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
582 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
583
584 error = gfs2_glock_nq(ghs); /* parent */
585 if (error)
586 goto out_parent;
587
588 error = gfs2_glock_nq(ghs + 1); /* child */
589 if (error)
590 goto out_child;
591
592 error = gfs2_glock_nq(ghs + 2); /* rgrp */
593 if (error)
594 goto out_rgrp;
595
596 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
597 if (error)
598 goto out_gunlock;
599
600 if (ip->i_entries < 2) {
601 if (gfs2_consist_inode(ip))
602 gfs2_dinode_print(ip);
603 error = -EIO;
604 goto out_gunlock;
605 }
606 if (ip->i_entries > 2) {
607 error = -ENOTEMPTY;
608 goto out_gunlock;
609 }
610
611 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
612 if (error)
613 goto out_gunlock;
614
615 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
616
617 gfs2_trans_end(sdp);
618
619out_gunlock:
620 gfs2_glock_dq(ghs + 2);
621out_rgrp:
622 gfs2_holder_uninit(ghs + 2);
623 gfs2_glock_dq(ghs + 1);
624out_child:
625 gfs2_holder_uninit(ghs + 1);
626 gfs2_glock_dq(ghs);
627out_parent:
628 gfs2_holder_uninit(ghs);
629 gfs2_glock_dq_uninit(&ri_gh);
630 return error;
631}
632
633/**
634 * gfs2_mknod - Make a special file
635 * @dir: The directory in which the special file will reside
636 * @dentry: The dentry of the special file
637 * @mode: The mode of the special file
638 * @rdev: The device specification of the special file
639 *
640 */
641
642static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
643 dev_t dev)
644{
645 struct gfs2_inode *dip = GFS2_I(dir);
646 struct gfs2_sbd *sdp = GFS2_SB(dir);
647 struct gfs2_holder ghs[2];
648 struct inode *inode;
649
650 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
651
652 inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
653 if (IS_ERR(inode)) {
654 gfs2_holder_uninit(ghs);
655 return PTR_ERR(inode);
656 }
657
658 gfs2_trans_end(sdp);
659 if (dip->i_alloc->al_rgd)
660 gfs2_inplace_release(dip);
661 gfs2_quota_unlock(dip);
662 gfs2_alloc_put(dip);
663
664 gfs2_glock_dq_uninit_m(2, ghs);
665
666 d_instantiate(dentry, inode);
667 mark_inode_dirty(inode);
668
669 return 0;
670}
671
672/*
673 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
674 * @this: move this
675 * @to: to here
676 *
677 * Follow @to back to the root and make sure we don't encounter @this
678 * Assumes we already hold the rename lock.
679 *
680 * Returns: errno
681 */
682
683static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
684{
685 struct inode *dir = &to->i_inode;
686 struct super_block *sb = dir->i_sb;
687 struct inode *tmp;
688 int error = 0;
689
690 igrab(dir);
691
692 for (;;) {
693 if (dir == &this->i_inode) {
694 error = -EINVAL;
695 break;
696 }
697 if (dir == sb->s_root->d_inode) {
698 error = 0;
699 break;
700 }
701
702 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
703 if (IS_ERR(tmp)) {
704 error = PTR_ERR(tmp);
705 break;
706 }
707
708 iput(dir);
709 dir = tmp;
710 }
711
712 iput(dir);
713
714 return error;
715}
716
717/**
718 * gfs2_rename - Rename a file
719 * @odir: Parent directory of old file name
720 * @odentry: The old dentry of the file
721 * @ndir: Parent directory of new file name
722 * @ndentry: The new dentry of the file
723 *
724 * Returns: errno
725 */
726
727static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 struct inode *ndir, struct dentry *ndentry)
729{
730 struct gfs2_inode *odip = GFS2_I(odir);
731 struct gfs2_inode *ndip = GFS2_I(ndir);
732 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
733 struct gfs2_inode *nip = NULL;
734 struct gfs2_sbd *sdp = GFS2_SB(odir);
735 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
736 struct gfs2_rgrpd *nrgd;
737 unsigned int num_gh;
738 int dir_rename = 0;
739 int alloc_required = 0;
740 unsigned int x;
741 int error;
742
743 if (ndentry->d_inode) {
744 nip = GFS2_I(ndentry->d_inode);
745 if (ip == nip)
746 return 0;
747 }
748
749 error = gfs2_rindex_hold(sdp, &ri_gh);
750 if (error)
751 return error;
752
753 if (odip != ndip) {
754 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
755 0, &r_gh);
756 if (error)
757 goto out;
758
759 if (S_ISDIR(ip->i_inode.i_mode)) {
760 dir_rename = 1;
761 /* don't move a dirctory into it's subdir */
762 error = gfs2_ok_to_move(ip, ndip);
763 if (error)
764 goto out_gunlock_r;
765 }
766 }
767
768 num_gh = 1;
769 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
770 if (odip != ndip) {
771 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
772 num_gh++;
773 }
774 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
775 num_gh++;
776
777 if (nip) {
778 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
779 num_gh++;
780 /* grab the resource lock for unlink flag twiddling
781 * this is the case of the target file already existing
782 * so we unlink before doing the rename
783 */
784 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
785 if (nrgd)
786 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
787 }
788
789 for (x = 0; x < num_gh; x++) {
790 error = gfs2_glock_nq(ghs + x);
791 if (error)
792 goto out_gunlock;
793 }
794
795 /* Check out the old directory */
796
797 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
798 if (error)
799 goto out_gunlock;
800
801 /* Check out the new directory */
802
803 if (nip) {
804 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
805 if (error)
806 goto out_gunlock;
807
808 if (S_ISDIR(nip->i_inode.i_mode)) {
809 if (nip->i_entries < 2) {
810 if (gfs2_consist_inode(nip))
811 gfs2_dinode_print(nip);
812 error = -EIO;
813 goto out_gunlock;
814 }
815 if (nip->i_entries > 2) {
816 error = -ENOTEMPTY;
817 goto out_gunlock;
818 }
819 }
820 } else {
821 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
822 if (error)
823 goto out_gunlock;
824
825 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
826 switch (error) {
827 case -ENOENT:
828 error = 0;
829 break;
830 case 0:
831 error = -EEXIST;
832 default:
833 goto out_gunlock;
834 };
835
836 if (odip != ndip) {
837 if (!ndip->i_inode.i_nlink) {
838 error = -EINVAL;
839 goto out_gunlock;
840 }
841 if (ndip->i_entries == (u32)-1) {
842 error = -EFBIG;
843 goto out_gunlock;
844 }
845 if (S_ISDIR(ip->i_inode.i_mode) &&
846 ndip->i_inode.i_nlink == (u32)-1) {
847 error = -EMLINK;
848 goto out_gunlock;
849 }
850 }
851 }
852
853 /* Check out the dir to be renamed */
854
855 if (dir_rename) {
856 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
857 if (error)
858 goto out_gunlock;
859 }
860
861 if (nip == NULL)
862 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
863 error = alloc_required;
864 if (error < 0)
865 goto out_gunlock;
866 error = 0;
867
868 if (alloc_required) {
869 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
870 if (!al) {
871 error = -ENOMEM;
872 goto out_gunlock;
873 }
874
875 error = gfs2_quota_lock_check(ndip);
876 if (error)
877 goto out_alloc;
878
879 al->al_requested = sdp->sd_max_dirres;
880
881 error = gfs2_inplace_reserve_ri(ndip);
882 if (error)
883 goto out_gunlock_q;
884
885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
886 gfs2_rg_blocks(al) +
887 4 * RES_DINODE + 4 * RES_LEAF +
888 RES_STATFS + RES_QUOTA + 4, 0);
889 if (error)
890 goto out_ipreserv;
891 } else {
892 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
893 5 * RES_LEAF + 4, 0);
894 if (error)
895 goto out_gunlock;
896 }
897
898 /* Remove the target file, if it exists */
899
900 if (nip) {
901 if (S_ISDIR(nip->i_inode.i_mode))
902 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
903 else {
904 error = gfs2_dir_del(ndip, &ndentry->d_name);
905 if (error)
906 goto out_end_trans;
907 error = gfs2_change_nlink(nip, -1);
908 }
909 if (error)
910 goto out_end_trans;
911 }
912
913 if (dir_rename) {
914 error = gfs2_change_nlink(ndip, +1);
915 if (error)
916 goto out_end_trans;
917 error = gfs2_change_nlink(odip, -1);
918 if (error)
919 goto out_end_trans;
920
921 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
922 if (error)
923 goto out_end_trans;
924 } else {
925 struct buffer_head *dibh;
926 error = gfs2_meta_inode_buffer(ip, &dibh);
927 if (error)
928 goto out_end_trans;
929 ip->i_inode.i_ctime = CURRENT_TIME;
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh);
933 }
934
935 error = gfs2_dir_del(odip, &odentry->d_name);
936 if (error)
937 goto out_end_trans;
938
939 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
940 if (error)
941 goto out_end_trans;
942
943out_end_trans:
944 gfs2_trans_end(sdp);
945out_ipreserv:
946 if (alloc_required)
947 gfs2_inplace_release(ndip);
948out_gunlock_q:
949 if (alloc_required)
950 gfs2_quota_unlock(ndip);
951out_alloc:
952 if (alloc_required)
953 gfs2_alloc_put(ndip);
954out_gunlock:
955 while (x--) {
956 gfs2_glock_dq(ghs + x);
957 gfs2_holder_uninit(ghs + x);
958 }
959out_gunlock_r:
960 if (r_gh.gh_gl)
961 gfs2_glock_dq_uninit(&r_gh);
962out:
963 gfs2_glock_dq_uninit(&ri_gh);
964 return error;
965}
966
967/**
968 * gfs2_follow_link - Follow a symbolic link
969 * @dentry: The dentry of the link
970 * @nd: Data that we pass to vfs_follow_link()
971 *
972 * This can handle symlinks of any size.
973 *
974 * Returns: 0 on success or error code
975 */
976
977static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
978{
979 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
980 struct gfs2_holder i_gh;
981 struct buffer_head *dibh;
982 unsigned int x, size;
983 char *buf;
984 int error;
985
986 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
987 error = gfs2_glock_nq(&i_gh);
988 if (error) {
989 gfs2_holder_uninit(&i_gh);
990 nd_set_link(nd, ERR_PTR(error));
991 return NULL;
992 }
993
994 size = (unsigned int)i_size_read(&ip->i_inode);
995 if (size == 0) {
996 gfs2_consist_inode(ip);
997 buf = ERR_PTR(-EIO);
998 goto out;
999 }
1000
1001 error = gfs2_meta_inode_buffer(ip, &dibh);
1002 if (error) {
1003 buf = ERR_PTR(error);
1004 goto out;
1005 }
1006
1007 x = size + 1;
1008 buf = kmalloc(x, GFP_NOFS);
1009 if (!buf)
1010 buf = ERR_PTR(-ENOMEM);
1011 else
1012 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1013 brelse(dibh);
1014out:
1015 gfs2_glock_dq_uninit(&i_gh);
1016 nd_set_link(nd, buf);
1017 return NULL;
1018}
1019
1020static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1021{
1022 char *s = nd_get_link(nd);
1023 if (!IS_ERR(s))
1024 kfree(s);
1025}
1026
1027/**
1028 * gfs2_permission -
1029 * @inode: The inode
1030 * @mask: The mask to be tested
1031 * @flags: Indicates whether this is an RCU path walk or not
1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only
1035 * lock the glock if its not already been done.
1036 *
1037 * Returns: errno
1038 */
1039
1040int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1041{
1042 struct gfs2_inode *ip;
1043 struct gfs2_holder i_gh;
1044 int error;
1045 int unlock = 0;
1046
1047
1048 ip = GFS2_I(inode);
1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error)
1054 return error;
1055 unlock = 1;
1056 }
1057
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES;
1060 else
1061 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh);
1064
1065 return error;
1066}
1067
1068static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{
1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 u32 ouid, ogid, nuid, ngid;
1073 int error;
1074
1075 ouid = inode->i_uid;
1076 ogid = inode->i_gid;
1077 nuid = attr->ia_uid;
1078 ngid = attr->ia_gid;
1079
1080 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1081 ouid = nuid = NO_QUOTA_CHANGE;
1082 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1083 ogid = ngid = NO_QUOTA_CHANGE;
1084
1085 if (!gfs2_alloc_get(ip))
1086 return -ENOMEM;
1087
1088 error = gfs2_quota_lock(ip, nuid, ngid);
1089 if (error)
1090 goto out_alloc;
1091
1092 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1093 error = gfs2_quota_check(ip, nuid, ngid);
1094 if (error)
1095 goto out_gunlock_q;
1096 }
1097
1098 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1099 if (error)
1100 goto out_gunlock_q;
1101
1102 error = gfs2_setattr_simple(ip, attr);
1103 if (error)
1104 goto out_end_trans;
1105
1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
1109 gfs2_quota_change(ip, blocks, nuid, ngid);
1110 }
1111
1112out_end_trans:
1113 gfs2_trans_end(sdp);
1114out_gunlock_q:
1115 gfs2_quota_unlock(ip);
1116out_alloc:
1117 gfs2_alloc_put(ip);
1118 return error;
1119}
1120
1121/**
1122 * gfs2_setattr - Change attributes on an inode
1123 * @dentry: The dentry which is changing
1124 * @attr: The structure describing the change
1125 *
1126 * The VFS layer wants to change one or more of an inodes attributes. Write
1127 * that change out to disk.
1128 *
1129 * Returns: errno
1130 */
1131
1132static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1133{
1134 struct inode *inode = dentry->d_inode;
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_holder i_gh;
1137 int error;
1138
1139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1140 if (error)
1141 return error;
1142
1143 error = -EPERM;
1144 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1145 goto out;
1146
1147 error = inode_change_ok(inode, attr);
1148 if (error)
1149 goto out;
1150
1151 if (attr->ia_valid & ATTR_SIZE)
1152 error = gfs2_setattr_size(inode, attr->ia_size);
1153 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1154 error = setattr_chown(inode, attr);
1155 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1156 error = gfs2_acl_chmod(ip, attr);
1157 else
1158 error = gfs2_setattr_simple(ip, attr);
1159
1160out:
1161 gfs2_glock_dq_uninit(&i_gh);
1162 if (!error)
1163 mark_inode_dirty(inode);
1164 return error;
1165}
1166
1167/**
1168 * gfs2_getattr - Read out an inode's attributes
1169 * @mnt: The vfsmount the inode is being accessed from
1170 * @dentry: The dentry to stat
1171 * @stat: The inode's stats
1172 *
1173 * This may be called from the VFS directly, or from within GFS2 with the
1174 * inode locked, so we look to see if the glock is already locked and only
1175 * lock the glock if its not already been done. Note that its the NFS
1176 * readdirplus operation which causes this to be called (from filldir)
1177 * with the glock already held.
1178 *
1179 * Returns: errno
1180 */
1181
1182static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1183 struct kstat *stat)
1184{
1185 struct inode *inode = dentry->d_inode;
1186 struct gfs2_inode *ip = GFS2_I(inode);
1187 struct gfs2_holder gh;
1188 int error;
1189 int unlock = 0;
1190
1191 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1192 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1193 if (error)
1194 return error;
1195 unlock = 1;
1196 }
1197
1198 generic_fillattr(inode, stat);
1199 if (unlock)
1200 gfs2_glock_dq_uninit(&gh);
1201
1202 return 0;
1203}
1204
1205static int gfs2_setxattr(struct dentry *dentry, const char *name,
1206 const void *data, size_t size, int flags)
1207{
1208 struct inode *inode = dentry->d_inode;
1209 struct gfs2_inode *ip = GFS2_I(inode);
1210 struct gfs2_holder gh;
1211 int ret;
1212
1213 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1214 ret = gfs2_glock_nq(&gh);
1215 if (ret == 0) {
1216 ret = generic_setxattr(dentry, name, data, size, flags);
1217 gfs2_glock_dq(&gh);
1218 }
1219 gfs2_holder_uninit(&gh);
1220 return ret;
1221}
1222
1223static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1224 void *data, size_t size)
1225{
1226 struct inode *inode = dentry->d_inode;
1227 struct gfs2_inode *ip = GFS2_I(inode);
1228 struct gfs2_holder gh;
1229 int ret;
1230
1231 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1232 ret = gfs2_glock_nq(&gh);
1233 if (ret == 0) {
1234 ret = generic_getxattr(dentry, name, data, size);
1235 gfs2_glock_dq(&gh);
1236 }
1237 gfs2_holder_uninit(&gh);
1238 return ret;
1239}
1240
1241static int gfs2_removexattr(struct dentry *dentry, const char *name)
1242{
1243 struct inode *inode = dentry->d_inode;
1244 struct gfs2_inode *ip = GFS2_I(inode);
1245 struct gfs2_holder gh;
1246 int ret;
1247
1248 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1249 ret = gfs2_glock_nq(&gh);
1250 if (ret == 0) {
1251 ret = generic_removexattr(dentry, name);
1252 gfs2_glock_dq(&gh);
1253 }
1254 gfs2_holder_uninit(&gh);
1255 return ret;
1256}
1257
1258static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1259 u64 start, u64 len)
1260{
1261 struct gfs2_inode *ip = GFS2_I(inode);
1262 struct gfs2_holder gh;
1263 int ret;
1264
1265 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1266 if (ret)
1267 return ret;
1268
1269 mutex_lock(&inode->i_mutex);
1270
1271 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1272 if (ret)
1273 goto out;
1274
1275 if (gfs2_is_stuffed(ip)) {
1276 u64 phys = ip->i_no_addr << inode->i_blkbits;
1277 u64 size = i_size_read(inode);
1278 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1279 FIEMAP_EXTENT_DATA_INLINE;
1280 phys += sizeof(struct gfs2_dinode);
1281 phys += start;
1282 if (start + len > size)
1283 len = size - start;
1284 if (start < size)
1285 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1286 len, flags);
1287 if (ret == 1)
1288 ret = 0;
1289 } else {
1290 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1291 gfs2_block_map);
1292 }
1293
1294 gfs2_glock_dq_uninit(&gh);
1295out:
1296 mutex_unlock(&inode->i_mutex);
1297 return ret;
1298}
1299
1300const struct inode_operations gfs2_file_iops = {
1301 .permission = gfs2_permission,
1302 .setattr = gfs2_setattr,
1303 .getattr = gfs2_getattr,
1304 .setxattr = gfs2_setxattr,
1305 .getxattr = gfs2_getxattr,
1306 .listxattr = gfs2_listxattr,
1307 .removexattr = gfs2_removexattr,
1308 .fiemap = gfs2_fiemap,
1309};
1310
1311const struct inode_operations gfs2_dir_iops = {
1312 .create = gfs2_create,
1313 .lookup = gfs2_lookup,
1314 .link = gfs2_link,
1315 .unlink = gfs2_unlink,
1316 .symlink = gfs2_symlink,
1317 .mkdir = gfs2_mkdir,
1318 .rmdir = gfs2_rmdir,
1319 .mknod = gfs2_mknod,
1320 .rename = gfs2_rename,
1321 .permission = gfs2_permission,
1322 .setattr = gfs2_setattr,
1323 .getattr = gfs2_getattr,
1324 .setxattr = gfs2_setxattr,
1325 .getxattr = gfs2_getxattr,
1326 .listxattr = gfs2_listxattr,
1327 .removexattr = gfs2_removexattr,
1328 .fiemap = gfs2_fiemap,
1329};
1330
1331const struct inode_operations gfs2_symlink_iops = {
1332 .readlink = generic_readlink,
1333 .follow_link = gfs2_follow_link,
1334 .put_link = gfs2_put_link,
1335 .permission = gfs2_permission,
1336 .setattr = gfs2_setattr,
1337 .getattr = gfs2_getattr,
1338 .setxattr = gfs2_setxattr,
1339 .getxattr = gfs2_getxattr,
1340 .listxattr = gfs2_listxattr,
1341 .removexattr = gfs2_removexattr,
1342 .fiemap = gfs2_fiemap,
1343};
1344
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cf930cd9664a..7273ad3c85ba 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
78 78
79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 unsigned char *buf2, unsigned int offset, 80 unsigned char *buf2, unsigned int offset,
81 unsigned int buflen, u32 block, 81 struct gfs2_bitmap *bi, u32 block,
82 unsigned char new_state) 82 unsigned char new_state)
83{ 83{
84 unsigned char *byte1, *byte2, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
85 unsigned int buflen = bi->bi_len;
85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 87
87 byte1 = buf1 + offset + (block / GFS2_NBBY); 88 byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 94
94 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
97 "new_state=%d\n",
98 (unsigned long long)block, cur_state, new_state);
99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
100 (unsigned long long)rgd->rd_addr,
101 (unsigned long)bi->bi_start);
102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
103 (unsigned long)bi->bi_offset,
104 (unsigned long)bi->bi_len);
105 dump_stack();
95 gfs2_consist_rgrpd(rgd); 106 gfs2_consist_rgrpd(rgd);
96 return; 107 return;
97 } 108 }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
381 392
382 if (gl) { 393 if (gl) {
383 gl->gl_object = NULL; 394 gl->gl_object = NULL;
395 gfs2_glock_add_to_lru(gl);
384 gfs2_glock_put(gl); 396 gfs2_glock_put(gl);
385 } 397 }
386 398
@@ -945,7 +957,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
945 /* rgblk_search can return a block < goal, so we need to 957 /* rgblk_search can return a block < goal, so we need to
946 keep it marching forward. */ 958 keep it marching forward. */
947 no_addr = block + rgd->rd_data0; 959 no_addr = block + rgd->rd_data0;
948 goal++; 960 goal = max(block + 1, goal + 1);
949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 961 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
950 continue; 962 continue;
951 if (no_addr == skip) 963 if (no_addr == skip)
@@ -971,7 +983,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
971 found++; 983 found++;
972 984
973 /* Limit reclaim to sensible number of tasks */ 985 /* Limit reclaim to sensible number of tasks */
974 if (found > 2*NR_CPUS) 986 if (found > NR_CPUS)
975 return; 987 return;
976 } 988 }
977 989
@@ -1365,7 +1377,7 @@ skip:
1365 1377
1366 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1367 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1368 bi->bi_len, blk, new_state); 1380 bi, blk, new_state);
1369 goal = blk; 1381 goal = blk;
1370 while (*n < elen) { 1382 while (*n < elen) {
1371 goal++; 1383 goal++;
@@ -1375,7 +1387,7 @@ skip:
1375 GFS2_BLKST_FREE) 1387 GFS2_BLKST_FREE)
1376 break; 1388 break;
1377 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1378 bi->bi_len, goal, new_state); 1390 bi, goal, new_state);
1379 (*n)++; 1391 (*n)++;
1380 } 1392 }
1381out: 1393out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1432 } 1444 }
1433 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1434 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1435 bi->bi_len, buf_blk, new_state); 1447 bi, buf_blk, new_state);
1436 } 1448 }
1437 1449
1438 return rgd; 1450 return rgd;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ec73ed70bae1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -657,7 +658,7 @@ out:
657 * @sdp: the file system 658 * @sdp: the file system
658 * 659 *
659 * This function flushes data and meta data for all machines by 660 * This function flushes data and meta data for all machines by
660 * aquiring the transaction log exclusively. All journals are 661 * acquiring the transaction log exclusively. All journals are
661 * ensured to be in a clean state as well. 662 * ensured to be in a clean state as well.
662 * 663 *
663 * Returns: errno 664 * Returns: errno
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
700 mutex_unlock(&sdp->sd_freeze_lock); 701 mutex_unlock(&sdp->sd_freeze_lock);
701} 702}
702 703
704void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
705{
706 struct gfs2_dinode *str = buf;
707
708 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
709 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
710 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
711 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
712 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
713 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
714 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
715 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
716 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
717 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
718 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
719 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
720 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
721 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
722
723 str->di_goal_meta = cpu_to_be64(ip->i_goal);
724 str->di_goal_data = cpu_to_be64(ip->i_goal);
725 str->di_generation = cpu_to_be64(ip->i_generation);
726
727 str->di_flags = cpu_to_be32(ip->i_diskflags);
728 str->di_height = cpu_to_be16(ip->i_height);
729 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
730 !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
731 GFS2_FORMAT_DE : 0);
732 str->di_depth = cpu_to_be16(ip->i_depth);
733 str->di_entries = cpu_to_be32(ip->i_entries);
734
735 str->di_eattr = cpu_to_be64(ip->i_eattr);
736 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
737 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
738 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
739}
703 740
704/** 741/**
705 * gfs2_write_inode - Make sure the inode is stable on the disk 742 * gfs2_write_inode - Make sure the inode is stable on the disk
706 * @inode: The inode 743 * @inode: The inode
707 * @sync: synchronous write flag 744 * @wbc: The writeback control structure
708 * 745 *
709 * Returns: errno 746 * Returns: errno
710 */ 747 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
713{ 750{
714 struct gfs2_inode *ip = GFS2_I(inode); 751 struct gfs2_inode *ip = GFS2_I(inode);
715 struct gfs2_sbd *sdp = GFS2_SB(inode); 752 struct gfs2_sbd *sdp = GFS2_SB(inode);
753 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
754 struct backing_dev_info *bdi = metamapping->backing_dev_info;
716 struct gfs2_holder gh; 755 struct gfs2_holder gh;
717 struct buffer_head *bh; 756 struct buffer_head *bh;
718 struct timespec atime; 757 struct timespec atime;
719 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
720 int ret = 0; 759 int ret = -EAGAIN;
721 760
722 /* Check this is a "normal" inode, etc */ 761 /* Skip timestamp update, if this is from a memalloc */
723 if (current->flags & PF_MEMALLOC) 762 if (current->flags & PF_MEMALLOC)
724 return 0; 763 goto do_flush;
725 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
726 if (ret) 765 if (ret)
727 goto do_flush; 766 goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
745do_flush: 784do_flush:
746 if (wbc->sync_mode == WB_SYNC_ALL) 785 if (wbc->sync_mode == WB_SYNC_ALL)
747 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
787 filemap_fdatawrite(metamapping);
788 if (bdi->dirty_exceeded)
789 gfs2_ail1_flush(sdp, wbc);
790 if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
791 ret = filemap_fdatawait(metamapping);
792 if (ret)
793 mark_inode_dirty_sync(inode);
748 return ret; 794 return ret;
749} 795}
750 796
@@ -874,8 +920,9 @@ restart:
874 920
875static int gfs2_sync_fs(struct super_block *sb, int wait) 921static int gfs2_sync_fs(struct super_block *sb, int wait)
876{ 922{
877 if (wait && sb->s_fs_info) 923 struct gfs2_sbd *sdp = sb->s_fs_info;
878 gfs2_log_flush(sb->s_fs_info, NULL); 924 if (wait && sdp)
925 gfs2_log_flush(sdp, NULL);
879 return 0; 926 return 0;
880} 927}
881 928
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1308 return 0; 1355 return 0;
1309} 1356}
1310 1357
1358static void gfs2_final_release_pages(struct gfs2_inode *ip)
1359{
1360 struct inode *inode = &ip->i_inode;
1361 struct gfs2_glock *gl = ip->i_gl;
1362
1363 truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1364 truncate_inode_pages(&inode->i_data, 0);
1365
1366 if (atomic_read(&gl->gl_revokes) == 0) {
1367 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1368 clear_bit(GLF_DIRTY, &gl->gl_flags);
1369 }
1370}
1371
1372static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1373{
1374 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1375 struct gfs2_alloc *al;
1376 struct gfs2_rgrpd *rgd;
1377 int error;
1378
1379 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1380 gfs2_consist_inode(ip);
1381 return -EIO;
1382 }
1383
1384 al = gfs2_alloc_get(ip);
1385 if (!al)
1386 return -ENOMEM;
1387
1388 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1389 if (error)
1390 goto out;
1391
1392 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1393 if (error)
1394 goto out_qs;
1395
1396 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1397 if (!rgd) {
1398 gfs2_consist_inode(ip);
1399 error = -EIO;
1400 goto out_rindex_relse;
1401 }
1402
1403 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1404 &al->al_rgd_gh);
1405 if (error)
1406 goto out_rindex_relse;
1407
1408 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1409 sdp->sd_jdesc->jd_blocks);
1410 if (error)
1411 goto out_rg_gunlock;
1412
1413 gfs2_free_di(rgd, ip);
1414
1415 gfs2_final_release_pages(ip);
1416
1417 gfs2_trans_end(sdp);
1418
1419out_rg_gunlock:
1420 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1421out_rindex_relse:
1422 gfs2_glock_dq_uninit(&al->al_ri_gh);
1423out_qs:
1424 gfs2_quota_unhold(ip);
1425out:
1426 gfs2_alloc_put(ip);
1427 return error;
1428}
1429
1311/* 1430/*
1312 * We have to (at the moment) hold the inodes main lock to cover 1431 * We have to (at the moment) hold the inodes main lock to cover
1313 * the gap between unlocking the shared lock on the iopen lock and 1432 * the gap between unlocking the shared lock on the iopen lock and
@@ -1318,15 +1437,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1318 1437
1319static void gfs2_evict_inode(struct inode *inode) 1438static void gfs2_evict_inode(struct inode *inode)
1320{ 1439{
1321 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1440 struct super_block *sb = inode->i_sb;
1441 struct gfs2_sbd *sdp = sb->s_fs_info;
1322 struct gfs2_inode *ip = GFS2_I(inode); 1442 struct gfs2_inode *ip = GFS2_I(inode);
1323 struct gfs2_holder gh; 1443 struct gfs2_holder gh;
1324 int error; 1444 int error;
1325 1445
1326 if (inode->i_nlink) 1446 if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
1327 goto out; 1447 goto out;
1328 1448
1329 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1449 /* Must not read inode block until block type has been verified */
1450 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1330 if (unlikely(error)) { 1451 if (unlikely(error)) {
1331 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1452 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1332 goto out; 1453 goto out;
@@ -1336,6 +1457,12 @@ static void gfs2_evict_inode(struct inode *inode)
1336 if (error) 1457 if (error)
1337 goto out_truncate; 1458 goto out_truncate;
1338 1459
1460 if (test_bit(GIF_INVALID, &ip->i_flags)) {
1461 error = gfs2_inode_refresh(ip);
1462 if (error)
1463 goto out_truncate;
1464 }
1465
1339 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1466 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1340 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1467 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1341 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1468 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
@@ -1363,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
1363 } 1490 }
1364 1491
1365 error = gfs2_dinode_dealloc(ip); 1492 error = gfs2_dinode_dealloc(ip);
1366 if (error) 1493 goto out_unlock;
1367 goto out_unlock;
1368 1494
1369out_truncate: 1495out_truncate:
1370 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1371 if (error) 1497 if (error)
1372 goto out_unlock; 1498 goto out_unlock;
1373 /* Needs to be done before glock release & also in a transaction */ 1499 gfs2_final_release_pages(ip);
1374 truncate_inode_pages(&inode->i_data, 0);
1375 gfs2_trans_end(sdp); 1500 gfs2_trans_end(sdp);
1376 1501
1377out_unlock: 1502out_unlock:
@@ -1386,6 +1511,7 @@ out:
1386 end_writeback(inode); 1511 end_writeback(inode);
1387 1512
1388 ip->i_gl->gl_object = NULL; 1513 ip->i_gl->gl_object = NULL;
1514 gfs2_glock_add_to_lru(ip->i_gl);
1389 gfs2_glock_put(ip->i_gl); 1515 gfs2_glock_put(ip->i_gl);
1390 ip->i_gl = NULL; 1516 ip->i_gl = NULL;
1391 if (ip->i_iopen_gh.gh_gl) { 1517 if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
81 81
82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) 82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
83{ 83{
84 const u8 *uuid = sdp->sd_sb.sb_uuid; 84 struct super_block *s = sdp->sd_vfs;
85 const u8 *uuid = s->s_uuid;
85 buf[0] = '\0'; 86 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 87 if (!gfs2_uuid_valid(uuid))
87 return 0; 88 return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
616 struct kobj_uevent_env *env) 617 struct kobj_uevent_env *env)
617{ 618{
618 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 619 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
619 const u8 *uuid = sdp->sd_sb.sb_uuid; 620 struct super_block *s = sdp->sd_vfs;
621 const u8 *uuid = s->s_uuid;
620 622
621 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 623 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
622 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 624 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h>
13#include "incore.h" 14#include "incore.h"
14#include "glock.h" 15#include "glock.h"
15 16
@@ -40,7 +41,9 @@
40 {(1UL << GLF_REPLY_PENDING), "r" }, \ 41 {(1UL << GLF_REPLY_PENDING), "r" }, \
41 {(1UL << GLF_INITIAL), "I" }, \ 42 {(1UL << GLF_INITIAL), "I" }, \
42 {(1UL << GLF_FROZEN), "F" }, \ 43 {(1UL << GLF_FROZEN), "F" }, \
43 {(1UL << GLF_QUEUED), "q" }) 44 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" })
44 47
45#ifndef NUMPTY 48#ifndef NUMPTY
46#define NUMPTY 49#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
94 __entry->new_state = glock_trace_state(new_state); 97 __entry->new_state = glock_trace_state(new_state);
95 __entry->tgt_state = glock_trace_state(gl->gl_target); 98 __entry->tgt_state = glock_trace_state(gl->gl_target);
96 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 99 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
97 __entry->flags = gl->gl_flags; 100 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
98 ), 101 ),
99 102
100 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", 103 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
127 __entry->gltype = gl->gl_name.ln_type; 130 __entry->gltype = gl->gl_name.ln_type;
128 __entry->glnum = gl->gl_name.ln_number; 131 __entry->glnum = gl->gl_name.ln_number;
129 __entry->cur_state = glock_trace_state(gl->gl_state); 132 __entry->cur_state = glock_trace_state(gl->gl_state);
130 __entry->flags = gl->gl_flags; 133 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
131 ), 134 ),
132 135
133 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", 136 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
161 __entry->glnum = gl->gl_name.ln_number; 164 __entry->glnum = gl->gl_name.ln_number;
162 __entry->cur_state = glock_trace_state(gl->gl_state); 165 __entry->cur_state = glock_trace_state(gl->gl_state);
163 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 166 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
164 __entry->flags = gl->gl_flags; 167 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
165 ), 168 ),
166 169
167 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", 170 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
318 MINOR(__entry->dev), __entry->blocks) 321 MINOR(__entry->dev), __entry->blocks)
319); 322);
320 323
324/* Writing back the AIL */
325TRACE_EVENT(gfs2_ail_flush,
326
327 TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
328
329 TP_ARGS(sdp, wbc, start),
330
331 TP_STRUCT__entry(
332 __field( dev_t, dev )
333 __field( int, start )
334 __field( int, sync_mode )
335 __field( long, nr_to_write )
336 ),
337
338 TP_fast_assign(
339 __entry->dev = sdp->sd_vfs->s_dev;
340 __entry->start = start;
341 __entry->sync_mode = wbc->sync_mode;
342 __entry->nr_to_write = wbc->nr_to_write;
343 ),
344
345 TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
346 MINOR(__entry->dev), __entry->start ? "start" : "end",
347 __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
348 __entry->nr_to_write)
349);
350
321/* Section 3 - bmap 351/* Section 3 - bmap
322 * 352 *
323 * Objectives: 353 * Objectives:
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 0c39dc3ef7d7..56bd15c5bf6c 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,6 @@
1config HPFS_FS 1config HPFS_FS
2 tristate "OS/2 HPFS file system support" 2 tristate "OS/2 HPFS file system support"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BROKEN || !PREEMPT
5 help 4 help
6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS 5 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
7 is the file system used for organizing files on OS/2 hard disk 6 is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index 5503e2c28910..7a5eb2c718c8 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,8 +8,6 @@
8 8
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
12
13/* 11/*
14 * Check if a sector is allocated in bitmap 12 * Check if a sector is allocated in bitmap
15 * This is really slow. Turned on only if chk==2 13 * This is really slow. Turned on only if chk==2
@@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
18static int chk_if_allocated(struct super_block *s, secno sec, char *msg) 16static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
19{ 17{
20 struct quad_buffer_head qbh; 18 struct quad_buffer_head qbh;
21 unsigned *bmp; 19 u32 *bmp;
22 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; 20 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail;
23 if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) { 21 if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) {
24 hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); 22 hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec);
25 goto fail1; 23 goto fail1;
26 } 24 }
@@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
28 if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) { 26 if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) {
29 unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4; 27 unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4;
30 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail; 28 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail;
31 if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) { 29 if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) {
32 hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec); 30 hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec);
33 goto fail1; 31 goto fail1;
34 } 32 }
@@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
75 hpfs_error(s, "Bad allocation size: %d", n); 73 hpfs_error(s, "Bad allocation size: %d", n);
76 return 0; 74 return 0;
77 } 75 }
78 lock_super(s);
79 if (bs != ~0x3fff) { 76 if (bs != ~0x3fff) {
80 if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls; 77 if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls;
81 } else { 78 } else {
@@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
85 ret = bs + nr; 82 ret = bs + nr;
86 goto rt; 83 goto rt;
87 } 84 }
88 /*if (!tstbits(bmp, nr + n, n + forward)) {
89 ret = bs + nr + n;
90 goto rt;
91 }*/
92 q = nr + n; b = 0; 85 q = nr + n; b = 0;
93 while ((a = tstbits(bmp, q, n + forward)) != 0) { 86 while ((a = tstbits(bmp, q, n + forward)) != 0) {
94 q += a; 87 q += a;
@@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
105 goto rt; 98 goto rt;
106 } 99 }
107 nr >>= 5; 100 nr >>= 5;
108 /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/ 101 /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */
109 i = nr; 102 i = nr;
110 do { 103 do {
111 if (!bmp[i]) goto cont; 104 if (!le32_to_cpu(bmp[i])) goto cont;
112 if (n + forward >= 0x3f && bmp[i] != -1) goto cont; 105 if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont;
113 q = i<<5; 106 q = i<<5;
114 if (i > 0) { 107 if (i > 0) {
115 unsigned k = bmp[i-1]; 108 unsigned k = le32_to_cpu(bmp[i-1]);
116 while (k & 0x80000000) { 109 while (k & 0x80000000) {
117 q--; k <<= 1; 110 q--; k <<= 1;
118 } 111 }
@@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
132 } while (i != nr); 125 } while (i != nr);
133 rt: 126 rt:
134 if (ret) { 127 if (ret) {
135 if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) { 128 if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
136 hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret); 129 hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret);
137 ret = 0; 130 ret = 0;
138 goto b; 131 goto b;
139 } 132 }
140 bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f)); 133 bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f)));
141 hpfs_mark_4buffers_dirty(&qbh); 134 hpfs_mark_4buffers_dirty(&qbh);
142 } 135 }
143 b: 136 b:
144 hpfs_brelse4(&qbh); 137 hpfs_brelse4(&qbh);
145 uls: 138 uls:
146 unlock_super(s);
147 return ret; 139 return ret;
148} 140}
149 141
@@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
155 * sectors 147 * sectors
156 */ 148 */
157 149
158secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock) 150secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward)
159{ 151{
160 secno sec; 152 secno sec;
161 int i; 153 int i;
@@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
167 forward = -forward; 159 forward = -forward;
168 f_p = 1; 160 f_p = 1;
169 } 161 }
170 if (lock) hpfs_lock_creation(s);
171 n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14; 162 n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14;
172 if (near && near < sbi->sb_fs_size) { 163 if (near && near < sbi->sb_fs_size) {
173 if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret; 164 if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret;
@@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
214 ret: 205 ret:
215 if (sec && f_p) { 206 if (sec && f_p) {
216 for (i = 0; i < forward; i++) { 207 for (i = 0; i < forward; i++) {
217 if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) { 208 if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
218 hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i); 209 hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
219 sec = 0; 210 sec = 0;
220 break; 211 break;
221 } 212 }
222 } 213 }
223 } 214 }
224 if (lock) hpfs_unlock_creation(s);
225 return sec; 215 return sec;
226} 216}
227 217
228static secno alloc_in_dirband(struct super_block *s, secno near, int lock) 218static secno alloc_in_dirband(struct super_block *s, secno near)
229{ 219{
230 unsigned nr = near; 220 unsigned nr = near;
231 secno sec; 221 secno sec;
@@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
236 nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4; 226 nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4;
237 nr -= sbi->sb_dirband_start; 227 nr -= sbi->sb_dirband_start;
238 nr >>= 2; 228 nr >>= 2;
239 if (lock) hpfs_lock_creation(s);
240 sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0); 229 sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
241 if (lock) hpfs_unlock_creation(s);
242 if (!sec) return 0; 230 if (!sec) return 0;
243 return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start; 231 return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
244} 232}
245 233
246/* Alloc sector if it's free */ 234/* Alloc sector if it's free */
247 235
248static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec) 236int hpfs_alloc_if_possible(struct super_block *s, secno sec)
249{ 237{
250 struct quad_buffer_head qbh; 238 struct quad_buffer_head qbh;
251 unsigned *bmp; 239 u32 *bmp;
252 lock_super(s);
253 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; 240 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end;
254 if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) { 241 if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) {
255 bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f)); 242 bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
256 hpfs_mark_4buffers_dirty(&qbh); 243 hpfs_mark_4buffers_dirty(&qbh);
257 hpfs_brelse4(&qbh); 244 hpfs_brelse4(&qbh);
258 unlock_super(s);
259 return 1; 245 return 1;
260 } 246 }
261 hpfs_brelse4(&qbh); 247 hpfs_brelse4(&qbh);
262 end: 248 end:
263 unlock_super(s);
264 return 0; 249 return 0;
265} 250}
266 251
267int hpfs_alloc_if_possible(struct super_block *s, secno sec)
268{
269 int r;
270 hpfs_lock_creation(s);
271 r = hpfs_alloc_if_possible_nolock(s, sec);
272 hpfs_unlock_creation(s);
273 return r;
274}
275
276/* Free sectors in bitmaps */ 252/* Free sectors in bitmaps */
277 253
278void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) 254void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
279{ 255{
280 struct quad_buffer_head qbh; 256 struct quad_buffer_head qbh;
281 unsigned *bmp; 257 u32 *bmp;
282 struct hpfs_sb_info *sbi = hpfs_sb(s); 258 struct hpfs_sb_info *sbi = hpfs_sb(s);
283 /*printk("2 - ");*/ 259 /*printk("2 - ");*/
284 if (!n) return; 260 if (!n) return;
@@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
286 hpfs_error(s, "Trying to free reserved sector %08x", sec); 262 hpfs_error(s, "Trying to free reserved sector %08x", sec);
287 return; 263 return;
288 } 264 }
289 lock_super(s);
290 sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n; 265 sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n;
291 if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff; 266 if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff;
292 new_map: 267 new_map:
293 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) { 268 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) {
294 unlock_super(s);
295 return; 269 return;
296 } 270 }
297 new_tst: 271 new_tst:
298 if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) { 272 if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) {
299 hpfs_error(s, "sector %08x not allocated", sec); 273 hpfs_error(s, "sector %08x not allocated", sec);
300 hpfs_brelse4(&qbh); 274 hpfs_brelse4(&qbh);
301 unlock_super(s);
302 return; 275 return;
303 } 276 }
304 bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f); 277 bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
305 if (!--n) { 278 if (!--n) {
306 hpfs_mark_4buffers_dirty(&qbh); 279 hpfs_mark_4buffers_dirty(&qbh);
307 hpfs_brelse4(&qbh); 280 hpfs_brelse4(&qbh);
308 unlock_super(s);
309 return; 281 return;
310 } 282 }
311 if (!(++sec & 0x3fff)) { 283 if (!(++sec & 0x3fff)) {
@@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
327 int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; 299 int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14;
328 int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; 300 int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff;
329 int i, j; 301 int i, j;
330 unsigned *bmp; 302 u32 *bmp;
331 struct quad_buffer_head qbh; 303 struct quad_buffer_head qbh;
332 if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { 304 if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
333 for (j = 0; j < 512; j++) { 305 for (j = 0; j < 512; j++) {
334 unsigned k; 306 unsigned k;
335 if (!bmp[j]) continue; 307 if (!le32_to_cpu(bmp[j])) continue;
336 for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) { 308 for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) {
337 hpfs_brelse4(&qbh); 309 hpfs_brelse4(&qbh);
338 return 0; 310 return 0;
339 } 311 }
@@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
352 chk_bmp: 324 chk_bmp:
353 if (bmp) { 325 if (bmp) {
354 for (j = 0; j < 512; j++) { 326 for (j = 0; j < 512; j++) {
355 unsigned k; 327 u32 k;
356 if (!bmp[j]) continue; 328 if (!le32_to_cpu(bmp[j])) continue;
357 for (k = 0xf; k; k <<= 4) 329 for (k = 0xf; k; k <<= 4)
358 if ((bmp[j] & k) == k) { 330 if ((le32_to_cpu(bmp[j]) & k) == k) {
359 if (!--n) { 331 if (!--n) {
360 hpfs_brelse4(&qbh); 332 hpfs_brelse4(&qbh);
361 return 0; 333 return 0;
@@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
379 hpfs_free_sectors(s, dno, 4); 351 hpfs_free_sectors(s, dno, 4);
380 } else { 352 } else {
381 struct quad_buffer_head qbh; 353 struct quad_buffer_head qbh;
382 unsigned *bmp; 354 u32 *bmp;
383 unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; 355 unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4;
384 lock_super(s);
385 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { 356 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
386 unlock_super(s);
387 return; 357 return;
388 } 358 }
389 bmp[ssec >> 5] |= 1 << (ssec & 0x1f); 359 bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
390 hpfs_mark_4buffers_dirty(&qbh); 360 hpfs_mark_4buffers_dirty(&qbh);
391 hpfs_brelse4(&qbh); 361 hpfs_brelse4(&qbh);
392 unlock_super(s);
393 } 362 }
394} 363}
395 364
396struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near, 365struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
397 dnode_secno *dno, struct quad_buffer_head *qbh, 366 dnode_secno *dno, struct quad_buffer_head *qbh)
398 int lock)
399{ 367{
400 struct dnode *d; 368 struct dnode *d;
401 if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) { 369 if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
402 if (!(*dno = alloc_in_dirband(s, near, lock))) 370 if (!(*dno = alloc_in_dirband(s, near)))
403 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL; 371 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
404 } else { 372 } else {
405 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) 373 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0)))
406 if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL; 374 if (!(*dno = alloc_in_dirband(s, near))) return NULL;
407 } 375 }
408 if (!(d = hpfs_get_4sectors(s, *dno, qbh))) { 376 if (!(d = hpfs_get_4sectors(s, *dno, qbh))) {
409 hpfs_free_dnode(s, *dno); 377 hpfs_free_dnode(s, *dno);
410 return NULL; 378 return NULL;
411 } 379 }
412 memset(d, 0, 2048); 380 memset(d, 0, 2048);
413 d->magic = DNODE_MAGIC; 381 d->magic = cpu_to_le32(DNODE_MAGIC);
414 d->first_free = 52; 382 d->first_free = cpu_to_le32(52);
415 d->dirent[0] = 32; 383 d->dirent[0] = 32;
416 d->dirent[2] = 8; 384 d->dirent[2] = 8;
417 d->dirent[30] = 1; 385 d->dirent[30] = 1;
418 d->dirent[31] = 255; 386 d->dirent[31] = 255;
419 d->self = *dno; 387 d->self = cpu_to_le32(*dno);
420 return d; 388 return d;
421} 389}
422 390
@@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f
424 struct buffer_head **bh) 392 struct buffer_head **bh)
425{ 393{
426 struct fnode *f; 394 struct fnode *f;
427 if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL; 395 if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL;
428 if (!(f = hpfs_get_sector(s, *fno, bh))) { 396 if (!(f = hpfs_get_sector(s, *fno, bh))) {
429 hpfs_free_sectors(s, *fno, 1); 397 hpfs_free_sectors(s, *fno, 1);
430 return NULL; 398 return NULL;
431 } 399 }
432 memset(f, 0, 512); 400 memset(f, 0, 512);
433 f->magic = FNODE_MAGIC; 401 f->magic = cpu_to_le32(FNODE_MAGIC);
434 f->ea_offs = 0xc4; 402 f->ea_offs = cpu_to_le16(0xc4);
435 f->btree.n_free_nodes = 8; 403 f->btree.n_free_nodes = 8;
436 f->btree.first_free = 8; 404 f->btree.first_free = cpu_to_le16(8);
437 return f; 405 return f;
438} 406}
439 407
@@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
441 struct buffer_head **bh) 409 struct buffer_head **bh)
442{ 410{
443 struct anode *a; 411 struct anode *a;
444 if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL; 412 if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL;
445 if (!(a = hpfs_get_sector(s, *ano, bh))) { 413 if (!(a = hpfs_get_sector(s, *ano, bh))) {
446 hpfs_free_sectors(s, *ano, 1); 414 hpfs_free_sectors(s, *ano, 1);
447 return NULL; 415 return NULL;
448 } 416 }
449 memset(a, 0, 512); 417 memset(a, 0, 512);
450 a->magic = ANODE_MAGIC; 418 a->magic = cpu_to_le32(ANODE_MAGIC);
451 a->self = *ano; 419 a->self = cpu_to_le32(*ano);
452 a->btree.n_free_nodes = 40; 420 a->btree.n_free_nodes = 40;
453 a->btree.n_used_nodes = 0; 421 a->btree.n_used_nodes = 0;
454 a->btree.first_free = 8; 422 a->btree.first_free = cpu_to_le16(8);
455 return a; 423 return a;
456} 424}
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 6a2f04bf3df0..08b503e8ed29 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
22 if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; 22 if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1;
23 if (btree->internal) { 23 if (btree->internal) {
24 for (i = 0; i < btree->n_used_nodes; i++) 24 for (i = 0; i < btree->n_used_nodes; i++)
25 if (btree->u.internal[i].file_secno > sec) { 25 if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) {
26 a = btree->u.internal[i].down; 26 a = le32_to_cpu(btree->u.internal[i].down);
27 brelse(bh); 27 brelse(bh);
28 if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; 28 if (!(anode = hpfs_map_anode(s, a, &bh))) return -1;
29 btree = &anode->btree; 29 btree = &anode->btree;
@@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
34 return -1; 34 return -1;
35 } 35 }
36 for (i = 0; i < btree->n_used_nodes; i++) 36 for (i = 0; i < btree->n_used_nodes; i++)
37 if (btree->u.external[i].file_secno <= sec && 37 if (le32_to_cpu(btree->u.external[i].file_secno) <= sec &&
38 btree->u.external[i].file_secno + btree->u.external[i].length > sec) { 38 le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) {
39 a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno; 39 a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno);
40 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) { 40 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) {
41 brelse(bh); 41 brelse(bh);
42 return -1; 42 return -1;
43 } 43 }
44 if (inode) { 44 if (inode) {
45 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); 45 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
46 hpfs_inode->i_file_sec = btree->u.external[i].file_secno; 46 hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno);
47 hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno; 47 hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno);
48 hpfs_inode->i_n_secs = btree->u.external[i].length; 48 hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length);
49 } 49 }
50 brelse(bh); 50 brelse(bh);
51 return a; 51 return a;
@@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
83 return -1; 83 return -1;
84 } 84 }
85 if (btree->internal) { 85 if (btree->internal) {
86 a = btree->u.internal[n].down; 86 a = le32_to_cpu(btree->u.internal[n].down);
87 btree->u.internal[n].file_secno = -1; 87 btree->u.internal[n].file_secno = cpu_to_le32(-1);
88 mark_buffer_dirty(bh); 88 mark_buffer_dirty(bh);
89 brelse(bh); 89 brelse(bh);
90 if (hpfs_sb(s)->sb_chk) 90 if (hpfs_sb(s)->sb_chk)
@@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
94 goto go_down; 94 goto go_down;
95 } 95 }
96 if (n >= 0) { 96 if (n >= 0) {
97 if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) { 97 if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) {
98 hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x", 98 hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x",
99 btree->u.external[n].file_secno + btree->u.external[n].length, fsecno, 99 le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno,
100 fnod?'f':'a', node); 100 fnod?'f':'a', node);
101 brelse(bh); 101 brelse(bh);
102 return -1; 102 return -1;
103 } 103 }
104 if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) { 104 if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
105 btree->u.external[n].length++; 105 btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1);
106 mark_buffer_dirty(bh); 106 mark_buffer_dirty(bh);
107 brelse(bh); 107 brelse(bh);
108 return se; 108 return se;
@@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
115 } 115 }
116 se = !fnod ? node : (node + 16384) & ~16383; 116 se = !fnod ? node : (node + 16384) & ~16383;
117 } 117 }
118 if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) { 118 if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) {
119 brelse(bh); 119 brelse(bh);
120 return -1; 120 return -1;
121 } 121 }
122 fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length; 122 fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length);
123 if (!btree->n_free_nodes) { 123 if (!btree->n_free_nodes) {
124 up = a != node ? anode->up : -1; 124 up = a != node ? le32_to_cpu(anode->up) : -1;
125 if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) { 125 if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) {
126 brelse(bh); 126 brelse(bh);
127 hpfs_free_sectors(s, se, 1); 127 hpfs_free_sectors(s, se, 1);
128 return -1; 128 return -1;
129 } 129 }
130 if (a == node && fnod) { 130 if (a == node && fnod) {
131 anode->up = node; 131 anode->up = cpu_to_le32(node);
132 anode->btree.fnode_parent = 1; 132 anode->btree.fnode_parent = 1;
133 anode->btree.n_used_nodes = btree->n_used_nodes; 133 anode->btree.n_used_nodes = btree->n_used_nodes;
134 anode->btree.first_free = btree->first_free; 134 anode->btree.first_free = btree->first_free;
@@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
137 btree->internal = 1; 137 btree->internal = 1;
138 btree->n_free_nodes = 11; 138 btree->n_free_nodes = 11;
139 btree->n_used_nodes = 1; 139 btree->n_used_nodes = 1;
140 btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree; 140 btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree);
141 btree->u.internal[0].file_secno = -1; 141 btree->u.internal[0].file_secno = cpu_to_le32(-1);
142 btree->u.internal[0].down = na; 142 btree->u.internal[0].down = cpu_to_le32(na);
143 mark_buffer_dirty(bh); 143 mark_buffer_dirty(bh);
144 } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) { 144 } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) {
145 brelse(bh); 145 brelse(bh);
@@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
153 btree = &anode->btree; 153 btree = &anode->btree;
154 } 154 }
155 btree->n_free_nodes--; n = btree->n_used_nodes++; 155 btree->n_free_nodes--; n = btree->n_used_nodes++;
156 btree->first_free += 12; 156 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12);
157 btree->u.external[n].disk_secno = se; 157 btree->u.external[n].disk_secno = cpu_to_le32(se);
158 btree->u.external[n].file_secno = fs; 158 btree->u.external[n].file_secno = cpu_to_le32(fs);
159 btree->u.external[n].length = 1; 159 btree->u.external[n].length = cpu_to_le32(1);
160 mark_buffer_dirty(bh); 160 mark_buffer_dirty(bh);
161 brelse(bh); 161 brelse(bh);
162 if ((a == node && fnod) || na == -1) return se; 162 if ((a == node && fnod) || na == -1) return se;
163 c2 = 0; 163 c2 = 0;
164 while (up != -1) { 164 while (up != (anode_secno)-1) {
165 struct anode *new_anode; 165 struct anode *new_anode;
166 if (hpfs_sb(s)->sb_chk) 166 if (hpfs_sb(s)->sb_chk)
167 if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1; 167 if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1;
@@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
174 } 174 }
175 if (btree->n_free_nodes) { 175 if (btree->n_free_nodes) {
176 btree->n_free_nodes--; n = btree->n_used_nodes++; 176 btree->n_free_nodes--; n = btree->n_used_nodes++;
177 btree->first_free += 8; 177 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8);
178 btree->u.internal[n].file_secno = -1; 178 btree->u.internal[n].file_secno = cpu_to_le32(-1);
179 btree->u.internal[n].down = na; 179 btree->u.internal[n].down = cpu_to_le32(na);
180 btree->u.internal[n-1].file_secno = fs; 180 btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
181 mark_buffer_dirty(bh); 181 mark_buffer_dirty(bh);
182 brelse(bh); 182 brelse(bh);
183 brelse(bh2); 183 brelse(bh2);
184 hpfs_free_sectors(s, ra, 1); 184 hpfs_free_sectors(s, ra, 1);
185 if ((anode = hpfs_map_anode(s, na, &bh))) { 185 if ((anode = hpfs_map_anode(s, na, &bh))) {
186 anode->up = up; 186 anode->up = cpu_to_le32(up);
187 anode->btree.fnode_parent = up == node && fnod; 187 anode->btree.fnode_parent = up == node && fnod;
188 mark_buffer_dirty(bh); 188 mark_buffer_dirty(bh);
189 brelse(bh); 189 brelse(bh);
190 } 190 }
191 return se; 191 return se;
192 } 192 }
193 up = up != node ? anode->up : -1; 193 up = up != node ? le32_to_cpu(anode->up) : -1;
194 btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1; 194 btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1);
195 mark_buffer_dirty(bh); 195 mark_buffer_dirty(bh);
196 brelse(bh); 196 brelse(bh);
197 a = na; 197 a = na;
198 if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { 198 if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) {
199 anode = new_anode; 199 anode = new_anode;
200 /*anode->up = up != -1 ? up : ra;*/ 200 /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/
201 anode->btree.internal = 1; 201 anode->btree.internal = 1;
202 anode->btree.n_used_nodes = 1; 202 anode->btree.n_used_nodes = 1;
203 anode->btree.n_free_nodes = 59; 203 anode->btree.n_free_nodes = 59;
204 anode->btree.first_free = 16; 204 anode->btree.first_free = cpu_to_le16(16);
205 anode->btree.u.internal[0].down = a; 205 anode->btree.u.internal[0].down = cpu_to_le32(a);
206 anode->btree.u.internal[0].file_secno = -1; 206 anode->btree.u.internal[0].file_secno = cpu_to_le32(-1);
207 mark_buffer_dirty(bh); 207 mark_buffer_dirty(bh);
208 brelse(bh); 208 brelse(bh);
209 if ((anode = hpfs_map_anode(s, a, &bh))) { 209 if ((anode = hpfs_map_anode(s, a, &bh))) {
210 anode->up = na; 210 anode->up = cpu_to_le32(na);
211 mark_buffer_dirty(bh); 211 mark_buffer_dirty(bh);
212 brelse(bh); 212 brelse(bh);
213 } 213 }
214 } else na = a; 214 } else na = a;
215 } 215 }
216 if ((anode = hpfs_map_anode(s, na, &bh))) { 216 if ((anode = hpfs_map_anode(s, na, &bh))) {
217 anode->up = node; 217 anode->up = cpu_to_le32(node);
218 if (fnod) anode->btree.fnode_parent = 1; 218 if (fnod) anode->btree.fnode_parent = 1;
219 mark_buffer_dirty(bh); 219 mark_buffer_dirty(bh);
220 brelse(bh); 220 brelse(bh);
@@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
232 } 232 }
233 btree = &fnode->btree; 233 btree = &fnode->btree;
234 } 234 }
235 ranode->up = node; 235 ranode->up = cpu_to_le32(node);
236 memcpy(&ranode->btree, btree, btree->first_free); 236 memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free));
237 if (fnod) ranode->btree.fnode_parent = 1; 237 if (fnod) ranode->btree.fnode_parent = 1;
238 ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; 238 ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes;
239 if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { 239 if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) {
240 struct anode *unode; 240 struct anode *unode;
241 if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) { 241 if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) {
242 unode->up = ra; 242 unode->up = cpu_to_le32(ra);
243 unode->btree.fnode_parent = 0; 243 unode->btree.fnode_parent = 0;
244 mark_buffer_dirty(bh1); 244 mark_buffer_dirty(bh1);
245 brelse(bh1); 245 brelse(bh1);
@@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
248 btree->internal = 1; 248 btree->internal = 1;
249 btree->n_free_nodes = fnod ? 10 : 58; 249 btree->n_free_nodes = fnod ? 10 : 58;
250 btree->n_used_nodes = 2; 250 btree->n_used_nodes = 2;
251 btree->first_free = (char *)&btree->u.internal[2] - (char *)btree; 251 btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree);
252 btree->u.internal[0].file_secno = fs; 252 btree->u.internal[0].file_secno = cpu_to_le32(fs);
253 btree->u.internal[0].down = ra; 253 btree->u.internal[0].down = cpu_to_le32(ra);
254 btree->u.internal[1].file_secno = -1; 254 btree->u.internal[1].file_secno = cpu_to_le32(-1);
255 btree->u.internal[1].down = na; 255 btree->u.internal[1].down = cpu_to_le32(na);
256 mark_buffer_dirty(bh); 256 mark_buffer_dirty(bh);
257 brelse(bh); 257 brelse(bh);
258 mark_buffer_dirty(bh2); 258 mark_buffer_dirty(bh2);
@@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
279 go_down: 279 go_down:
280 d2 = 0; 280 d2 = 0;
281 while (btree1->internal) { 281 while (btree1->internal) {
282 ano = btree1->u.internal[pos].down; 282 ano = le32_to_cpu(btree1->u.internal[pos].down);
283 if (level) brelse(bh); 283 if (level) brelse(bh);
284 if (hpfs_sb(s)->sb_chk) 284 if (hpfs_sb(s)->sb_chk)
285 if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1")) 285 if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1"))
@@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
290 pos = 0; 290 pos = 0;
291 } 291 }
292 for (i = 0; i < btree1->n_used_nodes; i++) 292 for (i = 0; i < btree1->n_used_nodes; i++)
293 hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length); 293 hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length));
294 go_up: 294 go_up:
295 if (!level) return; 295 if (!level) return;
296 brelse(bh); 296 brelse(bh);
@@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
298 if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return; 298 if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
299 hpfs_free_sectors(s, ano, 1); 299 hpfs_free_sectors(s, ano, 1);
300 oano = ano; 300 oano = ano;
301 ano = anode->up; 301 ano = le32_to_cpu(anode->up);
302 if (--level) { 302 if (--level) {
303 if (!(anode = hpfs_map_anode(s, ano, &bh))) return; 303 if (!(anode = hpfs_map_anode(s, ano, &bh))) return;
304 btree1 = &anode->btree; 304 btree1 = &anode->btree;
305 } else btree1 = btree; 305 } else btree1 = btree;
306 for (i = 0; i < btree1->n_used_nodes; i++) { 306 for (i = 0; i < btree1->n_used_nodes; i++) {
307 if (btree1->u.internal[i].down == oano) { 307 if (le32_to_cpu(btree1->u.internal[i].down) == oano) {
308 if ((pos = i + 1) < btree1->n_used_nodes) 308 if ((pos = i + 1) < btree1->n_used_nodes)
309 goto go_down; 309 goto go_down;
310 else 310 else
@@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
411 if (fno) { 411 if (fno) {
412 btree->n_free_nodes = 8; 412 btree->n_free_nodes = 8;
413 btree->n_used_nodes = 0; 413 btree->n_used_nodes = 0;
414 btree->first_free = 8; 414 btree->first_free = cpu_to_le16(8);
415 btree->internal = 0; 415 btree->internal = 0;
416 mark_buffer_dirty(bh); 416 mark_buffer_dirty(bh);
417 } else hpfs_free_sectors(s, f, 1); 417 } else hpfs_free_sectors(s, f, 1);
@@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
421 while (btree->internal) { 421 while (btree->internal) {
422 nodes = btree->n_used_nodes + btree->n_free_nodes; 422 nodes = btree->n_used_nodes + btree->n_free_nodes;
423 for (i = 0; i < btree->n_used_nodes; i++) 423 for (i = 0; i < btree->n_used_nodes; i++)
424 if (btree->u.internal[i].file_secno >= secs) goto f; 424 if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f;
425 brelse(bh); 425 brelse(bh);
426 hpfs_error(s, "internal btree %08x doesn't end with -1", node); 426 hpfs_error(s, "internal btree %08x doesn't end with -1", node);
427 return; 427 return;
428 f: 428 f:
429 for (j = i + 1; j < btree->n_used_nodes; j++) 429 for (j = i + 1; j < btree->n_used_nodes; j++)
430 hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0); 430 hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0);
431 btree->n_used_nodes = i + 1; 431 btree->n_used_nodes = i + 1;
432 btree->n_free_nodes = nodes - btree->n_used_nodes; 432 btree->n_free_nodes = nodes - btree->n_used_nodes;
433 btree->first_free = 8 + 8 * btree->n_used_nodes; 433 btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes);
434 mark_buffer_dirty(bh); 434 mark_buffer_dirty(bh);
435 if (btree->u.internal[i].file_secno == secs) { 435 if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) {
436 brelse(bh); 436 brelse(bh);
437 return; 437 return;
438 } 438 }
439 node = btree->u.internal[i].down; 439 node = le32_to_cpu(btree->u.internal[i].down);
440 brelse(bh); 440 brelse(bh);
441 if (hpfs_sb(s)->sb_chk) 441 if (hpfs_sb(s)->sb_chk)
442 if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree")) 442 if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree"))
@@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
446 } 446 }
447 nodes = btree->n_used_nodes + btree->n_free_nodes; 447 nodes = btree->n_used_nodes + btree->n_free_nodes;
448 for (i = 0; i < btree->n_used_nodes; i++) 448 for (i = 0; i < btree->n_used_nodes; i++)
449 if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff; 449 if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff;
450 brelse(bh); 450 brelse(bh);
451 return; 451 return;
452 ff: 452 ff:
453 if (secs <= btree->u.external[i].file_secno) { 453 if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) {
454 hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs); 454 hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs);
455 if (i) i--; 455 if (i) i--;
456 } 456 }
457 else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) { 457 else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) {
458 hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs - 458 hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs -
459 btree->u.external[i].file_secno, btree->u.external[i].length 459 le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length)
460 - secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */ 460 - secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */
461 btree->u.external[i].length = secs - btree->u.external[i].file_secno; 461 btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno));
462 } 462 }
463 for (j = i + 1; j < btree->n_used_nodes; j++) 463 for (j = i + 1; j < btree->n_used_nodes; j++)
464 hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length); 464 hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length));
465 btree->n_used_nodes = i + 1; 465 btree->n_used_nodes = i + 1;
466 btree->n_free_nodes = nodes - btree->n_used_nodes; 466 btree->n_free_nodes = nodes - btree->n_used_nodes;
467 btree->first_free = 8 + 12 * btree->n_used_nodes; 467 btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes);
468 mark_buffer_dirty(bh); 468 mark_buffer_dirty(bh);
469 brelse(bh); 469 brelse(bh);
470} 470}
@@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno)
480 struct extended_attribute *ea_end; 480 struct extended_attribute *ea_end;
481 if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; 481 if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return;
482 if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); 482 if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree);
483 else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno); 483 else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno));
484 ea_end = fnode_end_ea(fnode); 484 ea_end = fnode_end_ea(fnode);
485 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) 485 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
486 if (ea->indirect) 486 if (ea->indirect)
487 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); 487 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
488 hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l); 488 hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l));
489 brelse(bh); 489 brelse(bh);
490 hpfs_free_sectors(s, fno, 1); 490 hpfs_free_sectors(s, fno, 1);
491} 491}
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 793cb9d943d2..9ecde27d1e29 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -9,22 +9,6 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include "hpfs_fn.h" 10#include "hpfs_fn.h"
11 11
12void hpfs_lock_creation(struct super_block *s)
13{
14#ifdef DEBUG_LOCKS
15 printk("lock creation\n");
16#endif
17 mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
18}
19
20void hpfs_unlock_creation(struct super_block *s)
21{
22#ifdef DEBUG_LOCKS
23 printk("unlock creation\n");
24#endif
25 mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
26}
27
28/* Map a sector into a buffer and return pointers to it and to the buffer. */ 12/* Map a sector into a buffer and return pointers to it and to the buffer. */
29 13
30void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, 14void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp,
@@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head
32{ 16{
33 struct buffer_head *bh; 17 struct buffer_head *bh;
34 18
19 hpfs_lock_assert(s);
20
35 cond_resched(); 21 cond_resched();
36 22
37 *bhp = bh = sb_bread(s, secno); 23 *bhp = bh = sb_bread(s, secno);
@@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head
50 struct buffer_head *bh; 36 struct buffer_head *bh;
51 /*return hpfs_map_sector(s, secno, bhp, 0);*/ 37 /*return hpfs_map_sector(s, secno, bhp, 0);*/
52 38
39 hpfs_lock_assert(s);
40
53 cond_resched(); 41 cond_resched();
54 42
55 if ((*bhp = bh = sb_getblk(s, secno)) != NULL) { 43 if ((*bhp = bh = sb_getblk(s, secno)) != NULL) {
@@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
70 struct buffer_head *bh; 58 struct buffer_head *bh;
71 char *data; 59 char *data;
72 60
61 hpfs_lock_assert(s);
62
73 cond_resched(); 63 cond_resched();
74 64
75 if (secno & 3) { 65 if (secno & 3) {
@@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno,
125{ 115{
126 cond_resched(); 116 cond_resched();
127 117
118 hpfs_lock_assert(s);
119
128 if (secno & 3) { 120 if (secno & 3) {
129 printk("HPFS: hpfs_get_4sectors: unaligned read\n"); 121 printk("HPFS: hpfs_get_4sectors: unaligned read\n");
130 return NULL; 122 return NULL;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b3d7c0ddb609..f46ae025bfb5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
88 hpfs_error(inode->i_sb, "not a directory, fnode %08lx", 88 hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
89 (unsigned long)inode->i_ino); 89 (unsigned long)inode->i_ino);
90 } 90 }
91 if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) { 91 if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
92 e = 1; 92 e = 1;
93 hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno); 93 hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno));
94 } 94 }
95 brelse(bh); 95 brelse(bh);
96 if (e) { 96 if (e) {
@@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
156 goto again; 156 goto again;
157 } 157 }
158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); 158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
159 if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) { 159 if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) {
160 filp->f_pos = old_pos; 160 filp->f_pos = old_pos;
161 if (tempname != de->name) kfree(tempname); 161 if (tempname != de->name) kfree(tempname);
162 hpfs_brelse4(&qbh); 162 hpfs_brelse4(&qbh);
@@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
221 * Get inode number, what we're after. 221 * Get inode number, what we're after.
222 */ 222 */
223 223
224 ino = de->fnode; 224 ino = le32_to_cpu(de->fnode);
225 225
226 /* 226 /*
227 * Go find or make an inode. 227 * Go find or make an inode.
@@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
236 hpfs_init_inode(result); 236 hpfs_init_inode(result);
237 if (de->directory) 237 if (de->directory)
238 hpfs_read_inode(result); 238 hpfs_read_inode(result);
239 else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas) 239 else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas)
240 hpfs_read_inode(result); 240 hpfs_read_inode(result);
241 else { 241 else {
242 result->i_mode |= S_IFREG; 242 result->i_mode |= S_IFREG;
@@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
250 hpfs_result = hpfs_i(result); 250 hpfs_result = hpfs_i(result);
251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino; 251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
252 252
253 hpfs_decide_conv(result, name, len);
254
255 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) { 253 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
256 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); 254 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
257 goto bail1; 255 goto bail1;
@@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
263 */ 261 */
264 262
265 if (!result->i_ctime.tv_sec) { 263 if (!result->i_ctime.tv_sec) {
266 if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date))) 264 if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
267 result->i_ctime.tv_sec = 1; 265 result->i_ctime.tv_sec = 1;
268 result->i_ctime.tv_nsec = 0; 266 result->i_ctime.tv_nsec = 0;
269 result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date); 267 result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
270 result->i_mtime.tv_nsec = 0; 268 result->i_mtime.tv_nsec = 0;
271 result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date); 269 result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
272 result->i_atime.tv_nsec = 0; 270 result->i_atime.tv_nsec = 0;
273 hpfs_result->i_ea_size = de->ea_size; 271 hpfs_result->i_ea_size = le32_to_cpu(de->ea_size);
274 if (!hpfs_result->i_ea_mode && de->read_only) 272 if (!hpfs_result->i_ea_mode && de->read_only)
275 result->i_mode &= ~0222; 273 result->i_mode &= ~0222;
276 if (!de->directory) { 274 if (!de->directory) {
277 if (result->i_size == -1) { 275 if (result->i_size == -1) {
278 result->i_size = de->file_size; 276 result->i_size = le32_to_cpu(de->file_size);
279 result->i_data.a_ops = &hpfs_aops; 277 result->i_data.a_ops = &hpfs_aops;
280 hpfs_i(result)->mmu_private = result->i_size; 278 hpfs_i(result)->mmu_private = result->i_size;
281 /* 279 /*
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 9b2ffadfc8c4..1e0e2ac30fd3 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde)
14 struct hpfs_dirent *de_end = dnode_end_de(d); 14 struct hpfs_dirent *de_end = dnode_end_de(d);
15 int i = 1; 15 int i = 1;
16 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { 16 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
17 if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i; 17 if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i;
18 i++; 18 i++;
19 } 19 }
20 printk("HPFS: get_pos: not_found\n"); 20 printk("HPFS: get_pos: not_found\n");
21 return ((loff_t)d->self << 4) | (loff_t)1; 21 return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1;
22} 22}
23 23
24void hpfs_add_pos(struct inode *inode, loff_t *pos) 24void hpfs_add_pos(struct inode *inode, loff_t *pos)
@@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
130{ 130{
131 struct hpfs_dirent *de; 131 struct hpfs_dirent *de;
132 if (!(de = dnode_last_de(d))) { 132 if (!(de = dnode_last_de(d))) {
133 hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self); 133 hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self));
134 return; 134 return;
135 } 135 }
136 if (hpfs_sb(s)->sb_chk) { 136 if (hpfs_sb(s)->sb_chk) {
137 if (de->down) { 137 if (de->down) {
138 hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x", 138 hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x",
139 d->self, de_down_pointer(de)); 139 le32_to_cpu(d->self), de_down_pointer(de));
140 return; 140 return;
141 } 141 }
142 if (de->length != 32) { 142 if (le16_to_cpu(de->length) != 32) {
143 hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self); 143 hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self));
144 return; 144 return;
145 } 145 }
146 } 146 }
147 if (ptr) { 147 if (ptr) {
148 if ((d->first_free += 4) > 2048) { 148 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4);
149 hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self); 149 if (le32_to_cpu(d->first_free) > 2048) {
150 d->first_free -= 4; 150 hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
151 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4);
151 return; 152 return;
152 } 153 }
153 de->length = 36; 154 de->length = cpu_to_le16(36);
154 de->down = 1; 155 de->down = 1;
155 *(dnode_secno *)((char *)de + 32) = ptr; 156 *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr);
156 } 157 }
157} 158}
158 159
@@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
168 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { 169 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
169 int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last); 170 int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last);
170 if (!c) { 171 if (!c) {
171 hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self); 172 hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self));
172 return NULL; 173 return NULL;
173 } 174 }
174 if (c < 0) break; 175 if (c < 0) break;
@@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
176 memmove((char *)de + d_size, de, (char *)de_end - (char *)de); 177 memmove((char *)de + d_size, de, (char *)de_end - (char *)de);
177 memset(de, 0, d_size); 178 memset(de, 0, d_size);
178 if (down_ptr) { 179 if (down_ptr) {
179 *(int *)((char *)de + d_size - 4) = down_ptr; 180 *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr);
180 de->down = 1; 181 de->down = 1;
181 } 182 }
182 de->length = d_size; 183 de->length = cpu_to_le16(d_size);
183 if (down_ptr) de->down = 1;
184 de->not_8x3 = hpfs_is_name_long(name, namelen); 184 de->not_8x3 = hpfs_is_name_long(name, namelen);
185 de->namelen = namelen; 185 de->namelen = namelen;
186 memcpy(de->name, name, namelen); 186 memcpy(de->name, name, namelen);
187 d->first_free += d_size; 187 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size);
188 return de; 188 return de;
189} 189}
190 190
@@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d,
194 struct hpfs_dirent *de) 194 struct hpfs_dirent *de)
195{ 195{
196 if (de->last) { 196 if (de->last) {
197 hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self); 197 hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self));
198 return; 198 return;
199 } 199 }
200 d->first_free -= de->length; 200 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length));
201 memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de); 201 memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de);
202} 202}
203 203
204static void fix_up_ptrs(struct super_block *s, struct dnode *d) 204static void fix_up_ptrs(struct super_block *s, struct dnode *d)
205{ 205{
206 struct hpfs_dirent *de; 206 struct hpfs_dirent *de;
207 struct hpfs_dirent *de_end = dnode_end_de(d); 207 struct hpfs_dirent *de_end = dnode_end_de(d);
208 dnode_secno dno = d->self; 208 dnode_secno dno = le32_to_cpu(d->self);
209 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) 209 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de))
210 if (de->down) { 210 if (de->down) {
211 struct quad_buffer_head qbh; 211 struct quad_buffer_head qbh;
212 struct dnode *dd; 212 struct dnode *dd;
213 if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) { 213 if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) {
214 if (dd->up != dno || dd->root_dnode) { 214 if (le32_to_cpu(dd->up) != dno || dd->root_dnode) {
215 dd->up = dno; 215 dd->up = cpu_to_le32(dno);
216 dd->root_dnode = 0; 216 dd->root_dnode = 0;
217 hpfs_mark_4buffers_dirty(&qbh); 217 hpfs_mark_4buffers_dirty(&qbh);
218 } 218 }
@@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
262 kfree(nname); 262 kfree(nname);
263 return 1; 263 return 1;
264 } 264 }
265 if (d->first_free + de_size(namelen, down_ptr) <= 2048) { 265 if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) {
266 loff_t t; 266 loff_t t;
267 copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de); 267 copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de);
268 t = get_pos(d, de); 268 t = get_pos(d, de);
@@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
286 kfree(nname); 286 kfree(nname);
287 return 1; 287 return 1;
288 } 288 }
289 memcpy(nd, d, d->first_free); 289 memcpy(nd, d, le32_to_cpu(d->first_free));
290 copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de); 290 copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de);
291 for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1); 291 for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1);
292 h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10; 292 h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10;
293 if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) { 293 if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) {
294 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); 294 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
295 hpfs_brelse4(&qbh); 295 hpfs_brelse4(&qbh);
296 kfree(nd); 296 kfree(nd);
@@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
313 down_ptr = adno; 313 down_ptr = adno;
314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); 314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
315 de = de_next_de(de); 315 de = de_next_de(de);
316 memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de); 316 memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
317 nd->first_free -= (char *)de - (char *)nd - 20; 317 nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20));
318 memcpy(d, nd, nd->first_free); 318 memcpy(d, nd, le32_to_cpu(nd->first_free));
319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); 319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
320 fix_up_ptrs(i->i_sb, ad); 320 fix_up_ptrs(i->i_sb, ad);
321 if (!d->root_dnode) { 321 if (!d->root_dnode) {
322 dno = ad->up = d->up; 322 ad->up = d->up;
323 dno = le32_to_cpu(ad->up);
323 hpfs_mark_4buffers_dirty(&qbh); 324 hpfs_mark_4buffers_dirty(&qbh);
324 hpfs_brelse4(&qbh); 325 hpfs_brelse4(&qbh);
325 hpfs_mark_4buffers_dirty(&qbh1); 326 hpfs_mark_4buffers_dirty(&qbh1);
326 hpfs_brelse4(&qbh1); 327 hpfs_brelse4(&qbh1);
327 goto go_up; 328 goto go_up;
328 } 329 }
329 if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) { 330 if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) {
330 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); 331 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
331 hpfs_brelse4(&qbh); 332 hpfs_brelse4(&qbh);
332 hpfs_brelse4(&qbh1); 333 hpfs_brelse4(&qbh1);
@@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
338 i->i_blocks += 4; 339 i->i_blocks += 4;
339 rd->root_dnode = 1; 340 rd->root_dnode = 1;
340 rd->up = d->up; 341 rd->up = d->up;
341 if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) { 342 if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) {
342 hpfs_free_dnode(i->i_sb, rdno); 343 hpfs_free_dnode(i->i_sb, rdno);
343 hpfs_brelse4(&qbh); 344 hpfs_brelse4(&qbh);
344 hpfs_brelse4(&qbh1); 345 hpfs_brelse4(&qbh1);
@@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
347 kfree(nname); 348 kfree(nname);
348 return 1; 349 return 1;
349 } 350 }
350 fnode->u.external[0].disk_secno = rdno; 351 fnode->u.external[0].disk_secno = cpu_to_le32(rdno);
351 mark_buffer_dirty(bh); 352 mark_buffer_dirty(bh);
352 brelse(bh); 353 brelse(bh);
353 d->up = ad->up = hpfs_i(i)->i_dno = rdno; 354 hpfs_i(i)->i_dno = rdno;
355 d->up = ad->up = cpu_to_le32(rdno);
354 d->root_dnode = ad->root_dnode = 0; 356 d->root_dnode = ad->root_dnode = 0;
355 hpfs_mark_4buffers_dirty(&qbh); 357 hpfs_mark_4buffers_dirty(&qbh);
356 hpfs_brelse4(&qbh); 358 hpfs_brelse4(&qbh);
@@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
373 375
374int hpfs_add_dirent(struct inode *i, 376int hpfs_add_dirent(struct inode *i,
375 const unsigned char *name, unsigned namelen, 377 const unsigned char *name, unsigned namelen,
376 struct hpfs_dirent *new_de, int cdepth) 378 struct hpfs_dirent *new_de)
377{ 379{
378 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 380 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
379 struct dnode *d; 381 struct dnode *d;
@@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i,
403 } 405 }
404 } 406 }
405 hpfs_brelse4(&qbh); 407 hpfs_brelse4(&qbh);
406 if (!cdepth) hpfs_lock_creation(i->i_sb);
407 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) { 408 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) {
408 c = 1; 409 c = 1;
409 goto ret; 410 goto ret;
@@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i,
411 i->i_version++; 412 i->i_version++;
412 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0); 413 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
413 ret: 414 ret:
414 if (!cdepth) hpfs_unlock_creation(i->i_sb);
415 return c; 415 return c;
416} 416}
417 417
@@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
437 return 0; 437 return 0;
438 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0; 438 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0;
439 if (hpfs_sb(i->i_sb)->sb_chk) { 439 if (hpfs_sb(i->i_sb)->sb_chk) {
440 if (dnode->up != chk_up) { 440 if (le32_to_cpu(dnode->up) != chk_up) {
441 hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x", 441 hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x",
442 dno, chk_up, dnode->up); 442 dno, chk_up, le32_to_cpu(dnode->up));
443 hpfs_brelse4(&qbh); 443 hpfs_brelse4(&qbh);
444 return 0; 444 return 0;
445 } 445 }
@@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
455 hpfs_brelse4(&qbh); 455 hpfs_brelse4(&qbh);
456 } 456 }
457 while (!(de = dnode_pre_last_de(dnode))) { 457 while (!(de = dnode_pre_last_de(dnode))) {
458 dnode_secno up = dnode->up; 458 dnode_secno up = le32_to_cpu(dnode->up);
459 hpfs_brelse4(&qbh); 459 hpfs_brelse4(&qbh);
460 hpfs_free_dnode(i->i_sb, dno); 460 hpfs_free_dnode(i->i_sb, dno);
461 i->i_size -= 2048; 461 i->i_size -= 2048;
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
474 hpfs_brelse4(&qbh); 474 hpfs_brelse4(&qbh);
475 return 0; 475 return 0;
476 } 476 }
477 dnode->first_free -= 4; 477 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
478 de->length -= 4; 478 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
479 de->down = 0; 479 de->down = 0;
480 hpfs_mark_4buffers_dirty(&qbh); 480 hpfs_mark_4buffers_dirty(&qbh);
481 dno = up; 481 dno = up;
@@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
483 t = get_pos(dnode, de); 483 t = get_pos(dnode, de);
484 for_all_poss(i, hpfs_pos_subst, t, 4); 484 for_all_poss(i, hpfs_pos_subst, t, 4);
485 for_all_poss(i, hpfs_pos_subst, t + 1, 5); 485 for_all_poss(i, hpfs_pos_subst, t + 1, 5);
486 if (!(nde = kmalloc(de->length, GFP_NOFS))) { 486 if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
487 hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted"); 487 hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted");
488 hpfs_brelse4(&qbh); 488 hpfs_brelse4(&qbh);
489 return 0; 489 return 0;
490 } 490 }
491 memcpy(nde, de, de->length); 491 memcpy(nde, de, le16_to_cpu(de->length));
492 ddno = de->down ? de_down_pointer(de) : 0; 492 ddno = de->down ? de_down_pointer(de) : 0;
493 hpfs_delete_de(i->i_sb, dnode, de); 493 hpfs_delete_de(i->i_sb, dnode, de);
494 set_last_pointer(i->i_sb, dnode, ddno); 494 set_last_pointer(i->i_sb, dnode, ddno);
@@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
517 try_it_again: 517 try_it_again:
518 if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return; 518 if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return;
519 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return; 519 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return;
520 if (dnode->first_free > 56) goto end; 520 if (le32_to_cpu(dnode->first_free) > 56) goto end;
521 if (dnode->first_free == 52 || dnode->first_free == 56) { 521 if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) {
522 struct hpfs_dirent *de_end; 522 struct hpfs_dirent *de_end;
523 int root = dnode->root_dnode; 523 int root = dnode->root_dnode;
524 up = dnode->up; 524 up = le32_to_cpu(dnode->up);
525 de = dnode_first_de(dnode); 525 de = dnode_first_de(dnode);
526 down = de->down ? de_down_pointer(de) : 0; 526 down = de->down ? de_down_pointer(de) : 0;
527 if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) { 527 if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) {
@@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
545 return; 545 return;
546 } 546 }
547 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 547 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
548 d1->up = up; 548 d1->up = cpu_to_le32(up);
549 d1->root_dnode = 1; 549 d1->root_dnode = 1;
550 hpfs_mark_4buffers_dirty(&qbh1); 550 hpfs_mark_4buffers_dirty(&qbh1);
551 hpfs_brelse4(&qbh1); 551 hpfs_brelse4(&qbh1);
552 } 552 }
553 if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) { 553 if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) {
554 fnode->u.external[0].disk_secno = down; 554 fnode->u.external[0].disk_secno = cpu_to_le32(down);
555 mark_buffer_dirty(bh); 555 mark_buffer_dirty(bh);
556 brelse(bh); 556 brelse(bh);
557 } 557 }
@@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); 570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
571 if (!down) { 571 if (!down) {
572 de->down = 0; 572 de->down = 0;
573 de->length -= 4; 573 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
574 dnode->first_free -= 4; 574 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
575 memmove(de_next_de(de), (char *)de_next_de(de) + 4, 575 memmove(de_next_de(de), (char *)de_next_de(de) + 4,
576 (char *)dnode + dnode->first_free - (char *)de_next_de(de)); 576 (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
577 } else { 577 } else {
578 struct dnode *d1; 578 struct dnode *d1;
579 struct quad_buffer_head qbh1; 579 struct quad_buffer_head qbh1;
580 *(dnode_secno *) ((void *) de + de->length - 4) = down; 580 *(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down;
581 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 581 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
582 d1->up = up; 582 d1->up = cpu_to_le32(up);
583 hpfs_mark_4buffers_dirty(&qbh1); 583 hpfs_mark_4buffers_dirty(&qbh1);
584 hpfs_brelse4(&qbh1); 584 hpfs_brelse4(&qbh1);
585 } 585 }
586 } 586 }
587 } else { 587 } else {
588 hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free); 588 hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free));
589 goto end; 589 goto end;
590 } 590 }
591 591
@@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
596 struct quad_buffer_head qbh1; 596 struct quad_buffer_head qbh1;
597 if (!de_next->down) goto endm; 597 if (!de_next->down) goto endm;
598 ndown = de_down_pointer(de_next); 598 ndown = de_down_pointer(de_next);
599 if (!(de_cp = kmalloc(de->length, GFP_NOFS))) { 599 if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
600 printk("HPFS: out of memory for dtree balancing\n"); 600 printk("HPFS: out of memory for dtree balancing\n");
601 goto endm; 601 goto endm;
602 } 602 }
603 memcpy(de_cp, de, de->length); 603 memcpy(de_cp, de, le16_to_cpu(de->length));
604 hpfs_delete_de(i->i_sb, dnode, de); 604 hpfs_delete_de(i->i_sb, dnode, de);
605 hpfs_mark_4buffers_dirty(&qbh); 605 hpfs_mark_4buffers_dirty(&qbh);
606 hpfs_brelse4(&qbh); 606 hpfs_brelse4(&qbh);
607 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4); 607 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4);
608 for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1); 608 for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1);
609 if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) { 609 if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) {
610 d1->up = ndown; 610 d1->up = cpu_to_le32(ndown);
611 hpfs_mark_4buffers_dirty(&qbh1); 611 hpfs_mark_4buffers_dirty(&qbh1);
612 hpfs_brelse4(&qbh1); 612 hpfs_brelse4(&qbh1);
613 } 613 }
@@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
635 struct hpfs_dirent *del = dnode_last_de(d1); 635 struct hpfs_dirent *del = dnode_last_de(d1);
636 dlp = del->down ? de_down_pointer(del) : 0; 636 dlp = del->down ? de_down_pointer(del) : 0;
637 if (!dlp && down) { 637 if (!dlp && down) {
638 if (d1->first_free > 2044) { 638 if (le32_to_cpu(d1->first_free) > 2044) {
639 if (hpfs_sb(i->i_sb)->sb_chk >= 2) { 639 if (hpfs_sb(i->i_sb)->sb_chk >= 2) {
640 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); 640 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
641 printk("HPFS: warning: terminating balancing operation\n"); 641 printk("HPFS: warning: terminating balancing operation\n");
@@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); 647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
648 printk("HPFS: warning: goin'on\n"); 648 printk("HPFS: warning: goin'on\n");
649 } 649 }
650 del->length += 4; 650 del->length = cpu_to_le16(le16_to_cpu(del->length) + 4);
651 del->down = 1; 651 del->down = 1;
652 d1->first_free += 4; 652 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4);
653 } 653 }
654 if (dlp && !down) { 654 if (dlp && !down) {
655 del->length -= 4; 655 del->length = cpu_to_le16(le16_to_cpu(del->length) - 4);
656 del->down = 0; 656 del->down = 0;
657 d1->first_free -= 4; 657 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4);
658 } else if (down) 658 } else if (down)
659 *(dnode_secno *) ((void *) del + del->length - 4) = down; 659 *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
660 } else goto endm; 660 } else goto endm;
661 if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) { 661 if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) {
662 printk("HPFS: out of memory for dtree balancing\n"); 662 printk("HPFS: out of memory for dtree balancing\n");
663 hpfs_brelse4(&qbh1); 663 hpfs_brelse4(&qbh1);
664 goto endm; 664 goto endm;
665 } 665 }
666 hpfs_mark_4buffers_dirty(&qbh1); 666 hpfs_mark_4buffers_dirty(&qbh1);
667 hpfs_brelse4(&qbh1); 667 hpfs_brelse4(&qbh1);
668 memcpy(de_cp, de_prev, de_prev->length); 668 memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
669 hpfs_delete_de(i->i_sb, dnode, de_prev); 669 hpfs_delete_de(i->i_sb, dnode, de_prev);
670 if (!de_prev->down) { 670 if (!de_prev->down) {
671 de_prev->length += 4; 671 de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4);
672 de_prev->down = 1; 672 de_prev->down = 1;
673 dnode->first_free += 4; 673 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4);
674 } 674 }
675 *(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown; 675 *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
676 hpfs_mark_4buffers_dirty(&qbh); 676 hpfs_mark_4buffers_dirty(&qbh);
677 hpfs_brelse4(&qbh); 677 hpfs_brelse4(&qbh);
678 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); 678 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4);
679 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1)); 679 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1));
680 if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) { 680 if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) {
681 d1->up = ndown; 681 d1->up = cpu_to_le32(ndown);
682 hpfs_mark_4buffers_dirty(&qbh1); 682 hpfs_mark_4buffers_dirty(&qbh1);
683 hpfs_brelse4(&qbh1); 683 hpfs_brelse4(&qbh1);
684 } 684 }
@@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
701{ 701{
702 struct dnode *dnode = qbh->data; 702 struct dnode *dnode = qbh->data;
703 dnode_secno down = 0; 703 dnode_secno down = 0;
704 int lock = 0;
705 loff_t t; 704 loff_t t;
706 if (de->first || de->last) { 705 if (de->first || de->last) {
707 hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno); 706 hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno);
@@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
710 } 709 }
711 if (de->down) down = de_down_pointer(de); 710 if (de->down) down = de_down_pointer(de);
712 if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) { 711 if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) {
713 lock = 1;
714 hpfs_lock_creation(i->i_sb);
715 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) { 712 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) {
716 hpfs_brelse4(qbh); 713 hpfs_brelse4(qbh);
717 hpfs_unlock_creation(i->i_sb);
718 return 2; 714 return 2;
719 } 715 }
720 } 716 }
@@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
727 dnode_secno a = move_to_top(i, down, dno); 723 dnode_secno a = move_to_top(i, down, dno);
728 for_all_poss(i, hpfs_pos_subst, 5, t); 724 for_all_poss(i, hpfs_pos_subst, 5, t);
729 if (a) delete_empty_dnode(i, a); 725 if (a) delete_empty_dnode(i, a);
730 if (lock) hpfs_unlock_creation(i->i_sb);
731 return !a; 726 return !a;
732 } 727 }
733 delete_empty_dnode(i, dno); 728 delete_empty_dnode(i, dno);
734 if (lock) hpfs_unlock_creation(i->i_sb);
735 return 0; 729 return 0;
736} 730}
737 731
@@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
751 ptr = 0; 745 ptr = 0;
752 go_up: 746 go_up:
753 if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return; 747 if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return;
754 if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno) 748 if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno)
755 hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up); 749 hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up));
756 de = dnode_first_de(dnode); 750 de = dnode_first_de(dnode);
757 if (ptr) while(1) { 751 if (ptr) while(1) {
758 if (de->down) if (de_down_pointer(de) == ptr) goto process_de; 752 if (de->down) if (de_down_pointer(de) == ptr) goto process_de;
@@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
776 if (!de->first && !de->last && n_items) (*n_items)++; 770 if (!de->first && !de->last && n_items) (*n_items)++;
777 if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de; 771 if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de;
778 ptr = dno; 772 ptr = dno;
779 dno = dnode->up; 773 dno = le32_to_cpu(dnode->up);
780 if (dnode->root_dnode) { 774 if (dnode->root_dnode) {
781 hpfs_brelse4(&qbh); 775 hpfs_brelse4(&qbh);
782 return; 776 return;
@@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno)
824 return d; 818 return d;
825 if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno; 819 if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno;
826 if (hpfs_sb(s)->sb_chk) 820 if (hpfs_sb(s)->sb_chk)
827 if (up && ((struct dnode *)qbh.data)->up != up) 821 if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up)
828 hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up); 822 hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up));
829 if (!de->down) { 823 if (!de->down) {
830 hpfs_brelse4(&qbh); 824 hpfs_brelse4(&qbh);
831 return d; 825 return d;
@@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
874 /* Going up */ 868 /* Going up */
875 if (dnode->root_dnode) goto bail; 869 if (dnode->root_dnode) goto bail;
876 870
877 if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0))) 871 if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0)))
878 goto bail; 872 goto bail;
879 873
880 end_up_de = dnode_end_de(up_dnode); 874 end_up_de = dnode_end_de(up_dnode);
@@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
882 for (up_de = dnode_first_de(up_dnode); up_de < end_up_de; 876 for (up_de = dnode_first_de(up_dnode); up_de < end_up_de;
883 up_de = de_next_de(up_de)) { 877 up_de = de_next_de(up_de)) {
884 if (!(++c & 077)) hpfs_error(inode->i_sb, 878 if (!(++c & 077)) hpfs_error(inode->i_sb,
885 "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up); 879 "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up));
886 if (up_de->down && de_down_pointer(up_de) == dno) { 880 if (up_de->down && de_down_pointer(up_de) == dno) {
887 *posp = ((loff_t) dnode->up << 4) + c; 881 *posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c;
888 hpfs_brelse4(&qbh0); 882 hpfs_brelse4(&qbh0);
889 return de; 883 return de;
890 } 884 }
891 } 885 }
892 886
893 hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x", 887 hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x",
894 dno, dnode->up); 888 dno, le32_to_cpu(dnode->up));
895 hpfs_brelse4(&qbh0); 889 hpfs_brelse4(&qbh0);
896 890
897 bail: 891 bail:
@@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1017 /*name2[15] = 0xff;*/ 1011 /*name2[15] = 0xff;*/
1018 name1len = 15; name2len = 256; 1012 name1len = 15; name2len = 256;
1019 } 1013 }
1020 if (!(upf = hpfs_map_fnode(s, f->up, &bh))) { 1014 if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) {
1021 kfree(name2); 1015 kfree(name2);
1022 return NULL; 1016 return NULL;
1023 } 1017 }
1024 if (!upf->dirflag) { 1018 if (!upf->dirflag) {
1025 brelse(bh); 1019 brelse(bh);
1026 hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up); 1020 hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up));
1027 kfree(name2); 1021 kfree(name2);
1028 return NULL; 1022 return NULL;
1029 } 1023 }
1030 dno = upf->u.external[0].disk_secno; 1024 dno = le32_to_cpu(upf->u.external[0].disk_secno);
1031 brelse(bh); 1025 brelse(bh);
1032 go_down: 1026 go_down:
1033 downd = 0; 1027 downd = 0;
@@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1049 return NULL; 1043 return NULL;
1050 } 1044 }
1051 next_de: 1045 next_de:
1052 if (de->fnode == fno) { 1046 if (le32_to_cpu(de->fnode) == fno) {
1053 kfree(name2); 1047 kfree(name2);
1054 return de; 1048 return de;
1055 } 1049 }
@@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1065 goto go_down; 1059 goto go_down;
1066 } 1060 }
1067 f: 1061 f:
1068 if (de->fnode == fno) { 1062 if (le32_to_cpu(de->fnode) == fno) {
1069 kfree(name2); 1063 kfree(name2);
1070 return de; 1064 return de;
1071 } 1065 }
@@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1074 if ((de = de_next_de(de)) < de_end) goto next_de; 1068 if ((de = de_next_de(de)) < de_end) goto next_de;
1075 if (d->root_dnode) goto not_found; 1069 if (d->root_dnode) goto not_found;
1076 downd = dno; 1070 downd = dno;
1077 dno = d->up; 1071 dno = le32_to_cpu(d->up);
1078 hpfs_brelse4(qbh); 1072 hpfs_brelse4(qbh);
1079 if (hpfs_sb(s)->sb_chk) 1073 if (hpfs_sb(s)->sb_chk)
1080 if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) { 1074 if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 45e53d972b42..d8b84d113c89 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
24 } 24 }
25 if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; 25 if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return;
26 if (ea->indirect) { 26 if (ea->indirect) {
27 if (ea->valuelen != 8) { 27 if (ea_valuelen(ea) != 8) {
28 hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", 28 hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x",
29 ano ? "anode" : "sectors", a, pos); 29 ano ? "anode" : "sectors", a, pos);
30 return; 30 return;
@@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
33 return; 33 return;
34 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); 34 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
35 } 35 }
36 pos += ea->namelen + ea->valuelen + 5; 36 pos += ea->namelen + ea_valuelen(ea) + 5;
37 } 37 }
38 if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9); 38 if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9);
39 else { 39 else {
@@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
76 unsigned pos; 76 unsigned pos;
77 int ano, len; 77 int ano, len;
78 secno a; 78 secno a;
79 char ex[4 + 255 + 1 + 8];
79 struct extended_attribute *ea; 80 struct extended_attribute *ea;
80 struct extended_attribute *ea_end = fnode_end_ea(fnode); 81 struct extended_attribute *ea_end = fnode_end_ea(fnode);
81 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) 82 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
82 if (!strcmp(ea->name, key)) { 83 if (!strcmp(ea->name, key)) {
83 if (ea->indirect) 84 if (ea->indirect)
84 goto indirect; 85 goto indirect;
85 if (ea->valuelen >= size) 86 if (ea_valuelen(ea) >= size)
86 return -EINVAL; 87 return -EINVAL;
87 memcpy(buf, ea_data(ea), ea->valuelen); 88 memcpy(buf, ea_data(ea), ea_valuelen(ea));
88 buf[ea->valuelen] = 0; 89 buf[ea_valuelen(ea)] = 0;
89 return 0; 90 return 0;
90 } 91 }
91 a = fnode->ea_secno; 92 a = le32_to_cpu(fnode->ea_secno);
92 len = fnode->ea_size_l; 93 len = le32_to_cpu(fnode->ea_size_l);
93 ano = fnode->ea_anode; 94 ano = fnode->ea_anode;
94 pos = 0; 95 pos = 0;
95 while (pos < len) { 96 while (pos < len) {
96 char ex[4 + 255 + 1 + 8];
97 ea = (struct extended_attribute *)ex; 97 ea = (struct extended_attribute *)ex;
98 if (pos + 4 > len) { 98 if (pos + 4 > len) {
99 hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x", 99 hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x",
@@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
106 if (!strcmp(ea->name, key)) { 106 if (!strcmp(ea->name, key)) {
107 if (ea->indirect) 107 if (ea->indirect)
108 goto indirect; 108 goto indirect;
109 if (ea->valuelen >= size) 109 if (ea_valuelen(ea) >= size)
110 return -EINVAL; 110 return -EINVAL;
111 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf)) 111 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf))
112 return -EIO; 112 return -EIO;
113 buf[ea->valuelen] = 0; 113 buf[ea_valuelen(ea)] = 0;
114 return 0; 114 return 0;
115 } 115 }
116 pos += ea->namelen + ea->valuelen + 5; 116 pos += ea->namelen + ea_valuelen(ea) + 5;
117 } 117 }
118 return -ENOENT; 118 return -ENOENT;
119indirect: 119indirect:
@@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
138 if (!strcmp(ea->name, key)) { 138 if (!strcmp(ea->name, key)) {
139 if (ea->indirect) 139 if (ea->indirect)
140 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); 140 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
141 if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { 141 if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
142 printk("HPFS: out of memory for EA\n"); 142 printk("HPFS: out of memory for EA\n");
143 return NULL; 143 return NULL;
144 } 144 }
145 memcpy(ret, ea_data(ea), ea->valuelen); 145 memcpy(ret, ea_data(ea), ea_valuelen(ea));
146 ret[ea->valuelen] = 0; 146 ret[ea_valuelen(ea)] = 0;
147 return ret; 147 return ret;
148 } 148 }
149 a = fnode->ea_secno; 149 a = le32_to_cpu(fnode->ea_secno);
150 len = fnode->ea_size_l; 150 len = le32_to_cpu(fnode->ea_size_l);
151 ano = fnode->ea_anode; 151 ano = fnode->ea_anode;
152 pos = 0; 152 pos = 0;
153 while (pos < len) { 153 while (pos < len) {
@@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
164 if (!strcmp(ea->name, key)) { 164 if (!strcmp(ea->name, key)) {
165 if (ea->indirect) 165 if (ea->indirect)
166 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); 166 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
167 if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { 167 if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
168 printk("HPFS: out of memory for EA\n"); 168 printk("HPFS: out of memory for EA\n");
169 return NULL; 169 return NULL;
170 } 170 }
171 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) { 171 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) {
172 kfree(ret); 172 kfree(ret);
173 return NULL; 173 return NULL;
174 } 174 }
175 ret[ea->valuelen] = 0; 175 ret[ea_valuelen(ea)] = 0;
176 return ret; 176 return ret;
177 } 177 }
178 pos += ea->namelen + ea->valuelen + 5; 178 pos += ea->namelen + ea_valuelen(ea) + 5;
179 } 179 }
180 return NULL; 180 return NULL;
181} 181}
@@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
202 if (ea->indirect) { 202 if (ea->indirect) {
203 if (ea_len(ea) == size) 203 if (ea_len(ea) == size)
204 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); 204 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
205 } else if (ea->valuelen == size) { 205 } else if (ea_valuelen(ea) == size) {
206 memcpy(ea_data(ea), data, size); 206 memcpy(ea_data(ea), data, size);
207 } 207 }
208 return; 208 return;
209 } 209 }
210 a = fnode->ea_secno; 210 a = le32_to_cpu(fnode->ea_secno);
211 len = fnode->ea_size_l; 211 len = le32_to_cpu(fnode->ea_size_l);
212 ano = fnode->ea_anode; 212 ano = fnode->ea_anode;
213 pos = 0; 213 pos = 0;
214 while (pos < len) { 214 while (pos < len) {
@@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
228 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); 228 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
229 } 229 }
230 else { 230 else {
231 if (ea->valuelen == size) 231 if (ea_valuelen(ea) == size)
232 hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data); 232 hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data);
233 } 233 }
234 return; 234 return;
235 } 235 }
236 pos += ea->namelen + ea->valuelen + 5; 236 pos += ea->namelen + ea_valuelen(ea) + 5;
237 } 237 }
238 if (!fnode->ea_offs) { 238 if (!le16_to_cpu(fnode->ea_offs)) {
239 /*if (fnode->ea_size_s) { 239 /*if (le16_to_cpu(fnode->ea_size_s)) {
240 hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0", 240 hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0",
241 inode->i_ino, fnode->ea_size_s); 241 inode->i_ino, le16_to_cpu(fnode->ea_size_s));
242 return; 242 return;
243 }*/ 243 }*/
244 fnode->ea_offs = 0xc4; 244 fnode->ea_offs = cpu_to_le16(0xc4);
245 } 245 }
246 if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) { 246 if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
247 hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", 247 hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
248 (unsigned long)inode->i_ino, 248 (unsigned long)inode->i_ino,
249 fnode->ea_offs, fnode->ea_size_s); 249 le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
250 return; 250 return;
251 } 251 }
252 if ((fnode->ea_size_s || !fnode->ea_size_l) && 252 if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) &&
253 fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) { 253 le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) {
254 ea = fnode_end_ea(fnode); 254 ea = fnode_end_ea(fnode);
255 *(char *)ea = 0; 255 *(char *)ea = 0;
256 ea->namelen = strlen(key); 256 ea->namelen = strlen(key);
257 ea->valuelen = size; 257 ea->valuelen_lo = size;
258 ea->valuelen_hi = size >> 8;
258 strcpy(ea->name, key); 259 strcpy(ea->name, key);
259 memcpy(ea_data(ea), data, size); 260 memcpy(ea_data(ea), data, size);
260 fnode->ea_size_s += strlen(key) + size + 5; 261 fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5);
261 goto ret; 262 goto ret;
262 } 263 }
263 /* Most the code here is 99.9993422% unused. I hope there are no bugs. 264 /* Most the code here is 99.9993422% unused. I hope there are no bugs.
264 But what .. HPFS.IFS has also bugs in ea management. */ 265 But what .. HPFS.IFS has also bugs in ea management. */
265 if (fnode->ea_size_s && !fnode->ea_size_l) { 266 if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) {
266 secno n; 267 secno n;
267 struct buffer_head *bh; 268 struct buffer_head *bh;
268 char *data; 269 char *data;
269 if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return; 270 if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return;
270 if (!(data = hpfs_get_sector(s, n, &bh))) { 271 if (!(data = hpfs_get_sector(s, n, &bh))) {
271 hpfs_free_sectors(s, n, 1); 272 hpfs_free_sectors(s, n, 1);
272 return; 273 return;
273 } 274 }
274 memcpy(data, fnode_ea(fnode), fnode->ea_size_s); 275 memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s));
275 fnode->ea_size_l = fnode->ea_size_s; 276 fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s));
276 fnode->ea_size_s = 0; 277 fnode->ea_size_s = cpu_to_le16(0);
277 fnode->ea_secno = n; 278 fnode->ea_secno = cpu_to_le32(n);
278 fnode->ea_anode = 0; 279 fnode->ea_anode = cpu_to_le32(0);
279 mark_buffer_dirty(bh); 280 mark_buffer_dirty(bh);
280 brelse(bh); 281 brelse(bh);
281 } 282 }
282 pos = fnode->ea_size_l + 5 + strlen(key) + size; 283 pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size;
283 len = (fnode->ea_size_l + 511) >> 9; 284 len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9;
284 if (pos >= 30000) goto bail; 285 if (pos >= 30000) goto bail;
285 while (((pos + 511) >> 9) > len) { 286 while (((pos + 511) >> 9) > len) {
286 if (!len) { 287 if (!len) {
287 if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1))) 288 secno q = hpfs_alloc_sector(s, fno, 1, 0);
288 goto bail; 289 if (!q) goto bail;
290 fnode->ea_secno = cpu_to_le32(q);
289 fnode->ea_anode = 0; 291 fnode->ea_anode = 0;
290 len++; 292 len++;
291 } else if (!fnode->ea_anode) { 293 } else if (!fnode->ea_anode) {
292 if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) { 294 if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) {
293 len++; 295 len++;
294 } else { 296 } else {
295 /* Aargh... don't know how to create ea anodes :-( */ 297 /* Aargh... don't know how to create ea anodes :-( */
@@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
298 anode_secno a_s; 300 anode_secno a_s;
299 if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh))) 301 if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh)))
300 goto bail; 302 goto bail;
301 anode->up = fno; 303 anode->up = cpu_to_le32(fno);
302 anode->btree.fnode_parent = 1; 304 anode->btree.fnode_parent = 1;
303 anode->btree.n_free_nodes--; 305 anode->btree.n_free_nodes--;
304 anode->btree.n_used_nodes++; 306 anode->btree.n_used_nodes++;
305 anode->btree.first_free += 12; 307 anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12);
306 anode->u.external[0].disk_secno = fnode->ea_secno; 308 anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno));
307 anode->u.external[0].file_secno = 0; 309 anode->u.external[0].file_secno = cpu_to_le32(0);
308 anode->u.external[0].length = len; 310 anode->u.external[0].length = cpu_to_le32(len);
309 mark_buffer_dirty(bh); 311 mark_buffer_dirty(bh);
310 brelse(bh); 312 brelse(bh);
311 fnode->ea_anode = 1; 313 fnode->ea_anode = 1;
312 fnode->ea_secno = a_s;*/ 314 fnode->ea_secno = cpu_to_le32(a_s);*/
313 secno new_sec; 315 secno new_sec;
314 int i; 316 int i;
315 if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1))) 317 if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9))))
316 goto bail; 318 goto bail;
317 for (i = 0; i < len; i++) { 319 for (i = 0; i < len; i++) {
318 struct buffer_head *bh1, *bh2; 320 struct buffer_head *bh1, *bh2;
319 void *b1, *b2; 321 void *b1, *b2;
320 if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) { 322 if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) {
321 hpfs_free_sectors(s, new_sec, (pos + 511) >> 9); 323 hpfs_free_sectors(s, new_sec, (pos + 511) >> 9);
322 goto bail; 324 goto bail;
323 } 325 }
@@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
331 mark_buffer_dirty(bh2); 333 mark_buffer_dirty(bh2);
332 brelse(bh2); 334 brelse(bh2);
333 } 335 }
334 hpfs_free_sectors(s, fnode->ea_secno, len); 336 hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len);
335 fnode->ea_secno = new_sec; 337 fnode->ea_secno = cpu_to_le32(new_sec);
336 len = (pos + 511) >> 9; 338 len = (pos + 511) >> 9;
337 } 339 }
338 } 340 }
339 if (fnode->ea_anode) { 341 if (fnode->ea_anode) {
340 if (hpfs_add_sector_to_btree(s, fnode->ea_secno, 342 if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno),
341 0, len) != -1) { 343 0, len) != -1) {
342 len++; 344 len++;
343 } else { 345 } else {
@@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
349 h[1] = strlen(key); 351 h[1] = strlen(key);
350 h[2] = size & 0xff; 352 h[2] = size & 0xff;
351 h[3] = size >> 8; 353 h[3] = size >> 8;
352 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail; 354 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail;
353 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail; 355 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail;
354 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail; 356 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail;
355 fnode->ea_size_l = pos; 357 fnode->ea_size_l = cpu_to_le32(pos);
356 ret: 358 ret:
357 hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; 359 hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size;
358 return; 360 return;
359 bail: 361 bail:
360 if (fnode->ea_secno) 362 if (le32_to_cpu(fnode->ea_secno))
361 if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9); 363 if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9);
362 else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9)); 364 else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9));
363 else fnode->ea_secno = fnode->ea_size_l = 0; 365 else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0);
364} 366}
365 367
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9b9eb6933e43..89c500ee5213 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
20 20
21int hpfs_file_fsync(struct file *file, int datasync) 21int hpfs_file_fsync(struct file *file, int datasync)
22{ 22{
23 /*return file_fsync(file, datasync);*/ 23 struct inode *inode = file->f_mapping->host;
24 return 0; /* Don't fsync :-) */ 24 return sync_blockdev(inode->i_sb->s_bdev);
25} 25}
26 26
27/* 27/*
@@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
48static void hpfs_truncate(struct inode *i) 48static void hpfs_truncate(struct inode *i)
49{ 49{
50 if (IS_IMMUTABLE(i)) return /*-EPERM*/; 50 if (IS_IMMUTABLE(i)) return /*-EPERM*/;
51 hpfs_lock(i->i_sb); 51 hpfs_lock_assert(i->i_sb);
52
52 hpfs_i(i)->i_n_secs = 0; 53 hpfs_i(i)->i_n_secs = 0;
53 i->i_blocks = 1 + ((i->i_size + 511) >> 9); 54 i->i_blocks = 1 + ((i->i_size + 511) >> 9);
54 hpfs_i(i)->mmu_private = i->i_size; 55 hpfs_i(i)->mmu_private = i->i_size;
55 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); 56 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
56 hpfs_write_inode(i); 57 hpfs_write_inode(i);
57 hpfs_i(i)->i_n_secs = 0; 58 hpfs_i(i)->i_n_secs = 0;
58 hpfs_unlock(i->i_sb);
59} 59}
60 60
61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
62{ 62{
63 int r;
63 secno s; 64 secno s;
65 hpfs_lock(inode->i_sb);
64 s = hpfs_bmap(inode, iblock); 66 s = hpfs_bmap(inode, iblock);
65 if (s) { 67 if (s) {
66 map_bh(bh_result, inode->i_sb, s); 68 map_bh(bh_result, inode->i_sb, s);
67 return 0; 69 goto ret_0;
68 } 70 }
69 if (!create) return 0; 71 if (!create) goto ret_0;
70 if (iblock<<9 != hpfs_i(inode)->mmu_private) { 72 if (iblock<<9 != hpfs_i(inode)->mmu_private) {
71 BUG(); 73 BUG();
72 return -EIO; 74 r = -EIO;
75 goto ret_r;
73 } 76 }
74 if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) { 77 if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
75 hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1); 78 hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
76 return -ENOSPC; 79 r = -ENOSPC;
80 goto ret_r;
77 } 81 }
78 inode->i_blocks++; 82 inode->i_blocks++;
79 hpfs_i(inode)->mmu_private += 512; 83 hpfs_i(inode)->mmu_private += 512;
80 set_buffer_new(bh_result); 84 set_buffer_new(bh_result);
81 map_bh(bh_result, inode->i_sb, s); 85 map_bh(bh_result, inode->i_sb, s);
82 return 0; 86 ret_0:
87 r = 0;
88 ret_r:
89 hpfs_unlock(inode->i_sb);
90 return r;
83} 91}
84 92
85static int hpfs_writepage(struct page *page, struct writeback_control *wbc) 93static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
130 ssize_t retval; 138 ssize_t retval;
131 139
132 retval = do_sync_write(file, buf, count, ppos); 140 retval = do_sync_write(file, buf, count, ppos);
133 if (retval > 0) 141 if (retval > 0) {
142 hpfs_lock(file->f_path.dentry->d_sb);
134 hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1; 143 hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
144 hpfs_unlock(file->f_path.dentry->d_sb);
145 }
135 return retval; 146 return retval;
136} 147}
137 148
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 0e84c73cd9c4..8b0650aae328 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -19,9 +19,13 @@
19 For definitive information on HPFS, ask somebody else -- this is guesswork. 19 For definitive information on HPFS, ask somebody else -- this is guesswork.
20 There are certain to be many mistakes. */ 20 There are certain to be many mistakes. */
21 21
22#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
23#error unknown endian
24#endif
25
22/* Notation */ 26/* Notation */
23 27
24typedef unsigned secno; /* sector number, partition relative */ 28typedef u32 secno; /* sector number, partition relative */
25 29
26typedef secno dnode_secno; /* sector number of a dnode */ 30typedef secno dnode_secno; /* sector number of a dnode */
27typedef secno fnode_secno; /* sector number of an fnode */ 31typedef secno fnode_secno; /* sector number of an fnode */
@@ -38,28 +42,28 @@ typedef u32 time32_t; /* 32-bit time_t type */
38 42
39struct hpfs_boot_block 43struct hpfs_boot_block
40{ 44{
41 unsigned char jmp[3]; 45 u8 jmp[3];
42 unsigned char oem_id[8]; 46 u8 oem_id[8];
43 unsigned char bytes_per_sector[2]; /* 512 */ 47 u8 bytes_per_sector[2]; /* 512 */
44 unsigned char sectors_per_cluster; 48 u8 sectors_per_cluster;
45 unsigned char n_reserved_sectors[2]; 49 u8 n_reserved_sectors[2];
46 unsigned char n_fats; 50 u8 n_fats;
47 unsigned char n_rootdir_entries[2]; 51 u8 n_rootdir_entries[2];
48 unsigned char n_sectors_s[2]; 52 u8 n_sectors_s[2];
49 unsigned char media_byte; 53 u8 media_byte;
50 unsigned short sectors_per_fat; 54 u16 sectors_per_fat;
51 unsigned short sectors_per_track; 55 u16 sectors_per_track;
52 unsigned short heads_per_cyl; 56 u16 heads_per_cyl;
53 unsigned int n_hidden_sectors; 57 u32 n_hidden_sectors;
54 unsigned int n_sectors_l; /* size of partition */ 58 u32 n_sectors_l; /* size of partition */
55 unsigned char drive_number; 59 u8 drive_number;
56 unsigned char mbz; 60 u8 mbz;
57 unsigned char sig_28h; /* 28h */ 61 u8 sig_28h; /* 28h */
58 unsigned char vol_serno[4]; 62 u8 vol_serno[4];
59 unsigned char vol_label[11]; 63 u8 vol_label[11];
60 unsigned char sig_hpfs[8]; /* "HPFS " */ 64 u8 sig_hpfs[8]; /* "HPFS " */
61 unsigned char pad[448]; 65 u8 pad[448];
62 unsigned short magic; /* aa55 */ 66 u16 magic; /* aa55 */
63}; 67};
64 68
65 69
@@ -71,31 +75,29 @@ struct hpfs_boot_block
71 75
72struct hpfs_super_block 76struct hpfs_super_block
73{ 77{
74 unsigned magic; /* f995 e849 */ 78 u32 magic; /* f995 e849 */
75 unsigned magic1; /* fa53 e9c5, more magic? */ 79 u32 magic1; /* fa53 e9c5, more magic? */
76 /*unsigned huh202;*/ /* ?? 202 = N. of B. in 1.00390625 S.*/ 80 u8 version; /* version of a filesystem usually 2 */
77 char version; /* version of a filesystem usually 2 */ 81 u8 funcversion; /* functional version - oldest version
78 char funcversion; /* functional version - oldest version
79 of filesystem that can understand 82 of filesystem that can understand
80 this disk */ 83 this disk */
81 unsigned short int zero; /* 0 */ 84 u16 zero; /* 0 */
82 fnode_secno root; /* fnode of root directory */ 85 fnode_secno root; /* fnode of root directory */
83 secno n_sectors; /* size of filesystem */ 86 secno n_sectors; /* size of filesystem */
84 unsigned n_badblocks; /* number of bad blocks */ 87 u32 n_badblocks; /* number of bad blocks */
85 secno bitmaps; /* pointers to free space bit maps */ 88 secno bitmaps; /* pointers to free space bit maps */
86 unsigned zero1; /* 0 */ 89 u32 zero1; /* 0 */
87 secno badblocks; /* bad block list */ 90 secno badblocks; /* bad block list */
88 unsigned zero3; /* 0 */ 91 u32 zero3; /* 0 */
89 time32_t last_chkdsk; /* date last checked, 0 if never */ 92 time32_t last_chkdsk; /* date last checked, 0 if never */
90 /*unsigned zero4;*/ /* 0 */ 93 time32_t last_optimize; /* date last optimized, 0 if never */
91 time32_t last_optimize; /* date last optimized, 0 if never */
92 secno n_dir_band; /* number of sectors in dir band */ 94 secno n_dir_band; /* number of sectors in dir band */
93 secno dir_band_start; /* first sector in dir band */ 95 secno dir_band_start; /* first sector in dir band */
94 secno dir_band_end; /* last sector in dir band */ 96 secno dir_band_end; /* last sector in dir band */
95 secno dir_band_bitmap; /* free space map, 1 dnode per bit */ 97 secno dir_band_bitmap; /* free space map, 1 dnode per bit */
96 char volume_name[32]; /* not used */ 98 u8 volume_name[32]; /* not used */
97 secno user_id_table; /* 8 preallocated sectors - user id */ 99 secno user_id_table; /* 8 preallocated sectors - user id */
98 unsigned zero6[103]; /* 0 */ 100 u32 zero6[103]; /* 0 */
99}; 101};
100 102
101 103
@@ -107,44 +109,65 @@ struct hpfs_super_block
107 109
108struct hpfs_spare_block 110struct hpfs_spare_block
109{ 111{
110 unsigned magic; /* f991 1849 */ 112 u32 magic; /* f991 1849 */
111 unsigned magic1; /* fa52 29c5, more magic? */ 113 u32 magic1; /* fa52 29c5, more magic? */
112 114
113 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ 115#ifdef __LITTLE_ENDIAN
114 /*unsigned flag1234: 4;*/ /* unknown flags */ 116 u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
115 unsigned sparedir_used: 1; /* spare dirblks used */ 117 u8 sparedir_used: 1; /* spare dirblks used */
116 unsigned hotfixes_used: 1; /* hotfixes used */ 118 u8 hotfixes_used: 1; /* hotfixes used */
117 unsigned bad_sector: 1; /* bad sector, corrupted disk (???) */ 119 u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
118 unsigned bad_bitmap: 1; /* bad bitmap */ 120 u8 bad_bitmap: 1; /* bad bitmap */
119 unsigned fast: 1; /* partition was fast formatted */ 121 u8 fast: 1; /* partition was fast formatted */
120 unsigned old_wrote: 1; /* old version wrote to partion */ 122 u8 old_wrote: 1; /* old version wrote to partion */
121 unsigned old_wrote_1: 1; /* old version wrote to partion (?) */ 123 u8 old_wrote_1: 1; /* old version wrote to partion (?) */
122 unsigned install_dasd_limits: 1; /* HPFS386 flags */ 124#else
123 unsigned resynch_dasd_limits: 1; 125 u8 old_wrote_1: 1; /* old version wrote to partion (?) */
124 unsigned dasd_limits_operational: 1; 126 u8 old_wrote: 1; /* old version wrote to partion */
125 unsigned multimedia_active: 1; 127 u8 fast: 1; /* partition was fast formatted */
126 unsigned dce_acls_active: 1; 128 u8 bad_bitmap: 1; /* bad bitmap */
127 unsigned dasd_limits_dirty: 1; 129 u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
128 unsigned flag67: 2; 130 u8 hotfixes_used: 1; /* hotfixes used */
129 unsigned char mm_contlgulty; 131 u8 sparedir_used: 1; /* spare dirblks used */
130 unsigned char unused; 132 u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
133#endif
134
135#ifdef __LITTLE_ENDIAN
136 u8 install_dasd_limits: 1; /* HPFS386 flags */
137 u8 resynch_dasd_limits: 1;
138 u8 dasd_limits_operational: 1;
139 u8 multimedia_active: 1;
140 u8 dce_acls_active: 1;
141 u8 dasd_limits_dirty: 1;
142 u8 flag67: 2;
143#else
144 u8 flag67: 2;
145 u8 dasd_limits_dirty: 1;
146 u8 dce_acls_active: 1;
147 u8 multimedia_active: 1;
148 u8 dasd_limits_operational: 1;
149 u8 resynch_dasd_limits: 1;
150 u8 install_dasd_limits: 1; /* HPFS386 flags */
151#endif
152
153 u8 mm_contlgulty;
154 u8 unused;
131 155
132 secno hotfix_map; /* info about remapped bad sectors */ 156 secno hotfix_map; /* info about remapped bad sectors */
133 unsigned n_spares_used; /* number of hotfixes */ 157 u32 n_spares_used; /* number of hotfixes */
134 unsigned n_spares; /* number of spares in hotfix map */ 158 u32 n_spares; /* number of spares in hotfix map */
135 unsigned n_dnode_spares_free; /* spare dnodes unused */ 159 u32 n_dnode_spares_free; /* spare dnodes unused */
136 unsigned n_dnode_spares; /* length of spare_dnodes[] list, 160 u32 n_dnode_spares; /* length of spare_dnodes[] list,
137 follows in this block*/ 161 follows in this block*/
138 secno code_page_dir; /* code page directory block */ 162 secno code_page_dir; /* code page directory block */
139 unsigned n_code_pages; /* number of code pages */ 163 u32 n_code_pages; /* number of code pages */
140 /*unsigned large_numbers[2];*/ /* ?? */ 164 u32 super_crc; /* on HPFS386 and LAN Server this is
141 unsigned super_crc; /* on HPFS386 and LAN Server this is
142 checksum of superblock, on normal 165 checksum of superblock, on normal
143 OS/2 unused */ 166 OS/2 unused */
144 unsigned spare_crc; /* on HPFS386 checksum of spareblock */ 167 u32 spare_crc; /* on HPFS386 checksum of spareblock */
145 unsigned zero1[15]; /* unused */ 168 u32 zero1[15]; /* unused */
146 dnode_secno spare_dnodes[100]; /* emergency free dnode list */ 169 dnode_secno spare_dnodes[100]; /* emergency free dnode list */
147 unsigned zero2[1]; /* room for more? */ 170 u32 zero2[1]; /* room for more? */
148}; 171};
149 172
150/* The bad block list is 4 sectors long. The first word must be zero, 173/* The bad block list is 4 sectors long. The first word must be zero,
@@ -179,18 +202,18 @@ struct hpfs_spare_block
179 202
180struct code_page_directory 203struct code_page_directory
181{ 204{
182 unsigned magic; /* 4945 21f7 */ 205 u32 magic; /* 4945 21f7 */
183 unsigned n_code_pages; /* number of pointers following */ 206 u32 n_code_pages; /* number of pointers following */
184 unsigned zero1[2]; 207 u32 zero1[2];
185 struct { 208 struct {
186 unsigned short ix; /* index */ 209 u16 ix; /* index */
187 unsigned short code_page_number; /* code page number */ 210 u16 code_page_number; /* code page number */
188 unsigned bounds; /* matches corresponding word 211 u32 bounds; /* matches corresponding word
189 in data block */ 212 in data block */
190 secno code_page_data; /* sector number of a code_page_data 213 secno code_page_data; /* sector number of a code_page_data
191 containing c.p. array */ 214 containing c.p. array */
192 unsigned short index; /* index in c.p. array in that sector*/ 215 u16 index; /* index in c.p. array in that sector*/
193 unsigned short unknown; /* some unknown value; usually 0; 216 u16 unknown; /* some unknown value; usually 0;
194 2 in Japanese version */ 217 2 in Japanese version */
195 } array[31]; /* unknown length */ 218 } array[31]; /* unknown length */
196}; 219};
@@ -201,21 +224,21 @@ struct code_page_directory
201 224
202struct code_page_data 225struct code_page_data
203{ 226{
204 unsigned magic; /* 8945 21f7 */ 227 u32 magic; /* 8945 21f7 */
205 unsigned n_used; /* # elements used in c_p_data[] */ 228 u32 n_used; /* # elements used in c_p_data[] */
206 unsigned bounds[3]; /* looks a bit like 229 u32 bounds[3]; /* looks a bit like
207 (beg1,end1), (beg2,end2) 230 (beg1,end1), (beg2,end2)
208 one byte each */ 231 one byte each */
209 unsigned short offs[3]; /* offsets from start of sector 232 u16 offs[3]; /* offsets from start of sector
210 to start of c_p_data[ix] */ 233 to start of c_p_data[ix] */
211 struct { 234 struct {
212 unsigned short ix; /* index */ 235 u16 ix; /* index */
213 unsigned short code_page_number; /* code page number */ 236 u16 code_page_number; /* code page number */
214 unsigned short unknown; /* the same as in cp directory */ 237 u16 unknown; /* the same as in cp directory */
215 unsigned char map[128]; /* upcase table for chars 80..ff */ 238 u8 map[128]; /* upcase table for chars 80..ff */
216 unsigned short zero2; 239 u16 zero2;
217 } code_page[3]; 240 } code_page[3];
218 unsigned char incognita[78]; 241 u8 incognita[78];
219}; 242};
220 243
221 244
@@ -255,50 +278,84 @@ struct code_page_data
255#define DNODE_MAGIC 0x77e40aae 278#define DNODE_MAGIC 0x77e40aae
256 279
257struct dnode { 280struct dnode {
258 unsigned magic; /* 77e4 0aae */ 281 u32 magic; /* 77e4 0aae */
259 unsigned first_free; /* offset from start of dnode to 282 u32 first_free; /* offset from start of dnode to
260 first free dir entry */ 283 first free dir entry */
261 unsigned root_dnode:1; /* Is it root dnode? */ 284#ifdef __LITTLE_ENDIAN
262 unsigned increment_me:31; /* some kind of activity counter? 285 u8 root_dnode: 1; /* Is it root dnode? */
263 Neither HPFS.IFS nor CHKDSK cares 286 u8 increment_me: 7; /* some kind of activity counter? */
287 /* Neither HPFS.IFS nor CHKDSK cares
288 if you change this word */
289#else
290 u8 increment_me: 7; /* some kind of activity counter? */
291 /* Neither HPFS.IFS nor CHKDSK cares
264 if you change this word */ 292 if you change this word */
293 u8 root_dnode: 1; /* Is it root dnode? */
294#endif
295 u8 increment_me2[3];
265 secno up; /* (root dnode) directory's fnode 296 secno up; /* (root dnode) directory's fnode
266 (nonroot) parent dnode */ 297 (nonroot) parent dnode */
267 dnode_secno self; /* pointer to this dnode */ 298 dnode_secno self; /* pointer to this dnode */
268 unsigned char dirent[2028]; /* one or more dirents */ 299 u8 dirent[2028]; /* one or more dirents */
269}; 300};
270 301
271struct hpfs_dirent { 302struct hpfs_dirent {
272 unsigned short length; /* offset to next dirent */ 303 u16 length; /* offset to next dirent */
273 unsigned first: 1; /* set on phony ^A^A (".") entry */ 304
274 unsigned has_acl: 1; 305#ifdef __LITTLE_ENDIAN
275 unsigned down: 1; /* down pointer present (after name) */ 306 u8 first: 1; /* set on phony ^A^A (".") entry */
276 unsigned last: 1; /* set on phony \377 entry */ 307 u8 has_acl: 1;
277 unsigned has_ea: 1; /* entry has EA */ 308 u8 down: 1; /* down pointer present (after name) */
278 unsigned has_xtd_perm: 1; /* has extended perm list (???) */ 309 u8 last: 1; /* set on phony \377 entry */
279 unsigned has_explicit_acl: 1; 310 u8 has_ea: 1; /* entry has EA */
280 unsigned has_needea: 1; /* ?? some EA has NEEDEA set 311 u8 has_xtd_perm: 1; /* has extended perm list (???) */
312 u8 has_explicit_acl: 1;
313 u8 has_needea: 1; /* ?? some EA has NEEDEA set
314 I have no idea why this is
315 interesting in a dir entry */
316#else
317 u8 has_needea: 1; /* ?? some EA has NEEDEA set
281 I have no idea why this is 318 I have no idea why this is
282 interesting in a dir entry */ 319 interesting in a dir entry */
283 unsigned read_only: 1; /* dos attrib */ 320 u8 has_explicit_acl: 1;
284 unsigned hidden: 1; /* dos attrib */ 321 u8 has_xtd_perm: 1; /* has extended perm list (???) */
285 unsigned system: 1; /* dos attrib */ 322 u8 has_ea: 1; /* entry has EA */
286 unsigned flag11: 1; /* would be volume label dos attrib */ 323 u8 last: 1; /* set on phony \377 entry */
287 unsigned directory: 1; /* dos attrib */ 324 u8 down: 1; /* down pointer present (after name) */
288 unsigned archive: 1; /* dos attrib */ 325 u8 has_acl: 1;
289 unsigned not_8x3: 1; /* name is not 8.3 */ 326 u8 first: 1; /* set on phony ^A^A (".") entry */
290 unsigned flag15: 1; 327#endif
328
329#ifdef __LITTLE_ENDIAN
330 u8 read_only: 1; /* dos attrib */
331 u8 hidden: 1; /* dos attrib */
332 u8 system: 1; /* dos attrib */
333 u8 flag11: 1; /* would be volume label dos attrib */
334 u8 directory: 1; /* dos attrib */
335 u8 archive: 1; /* dos attrib */
336 u8 not_8x3: 1; /* name is not 8.3 */
337 u8 flag15: 1;
338#else
339 u8 flag15: 1;
340 u8 not_8x3: 1; /* name is not 8.3 */
341 u8 archive: 1; /* dos attrib */
342 u8 directory: 1; /* dos attrib */
343 u8 flag11: 1; /* would be volume label dos attrib */
344 u8 system: 1; /* dos attrib */
345 u8 hidden: 1; /* dos attrib */
346 u8 read_only: 1; /* dos attrib */
347#endif
348
291 fnode_secno fnode; /* fnode giving allocation info */ 349 fnode_secno fnode; /* fnode giving allocation info */
292 time32_t write_date; /* mtime */ 350 time32_t write_date; /* mtime */
293 unsigned file_size; /* file length, bytes */ 351 u32 file_size; /* file length, bytes */
294 time32_t read_date; /* atime */ 352 time32_t read_date; /* atime */
295 time32_t creation_date; /* ctime */ 353 time32_t creation_date; /* ctime */
296 unsigned ea_size; /* total EA length, bytes */ 354 u32 ea_size; /* total EA length, bytes */
297 unsigned char no_of_acls : 3; /* number of ACL's */ 355 u8 no_of_acls; /* number of ACL's (low 3 bits) */
298 unsigned char reserver : 5; 356 u8 ix; /* code page index (of filename), see
299 unsigned char ix; /* code page index (of filename), see
300 struct code_page_data */ 357 struct code_page_data */
301 unsigned char namelen, name[1]; /* file name */ 358 u8 namelen, name[1]; /* file name */
302 /* dnode_secno down; btree down pointer, if present, 359 /* dnode_secno down; btree down pointer, if present,
303 follows name on next word boundary, or maybe it 360 follows name on next word boundary, or maybe it
304 precedes next dirent, which is on a word boundary. */ 361 precedes next dirent, which is on a word boundary. */
@@ -318,38 +375,50 @@ struct hpfs_dirent {
318 375
319struct bplus_leaf_node 376struct bplus_leaf_node
320{ 377{
321 unsigned file_secno; /* first file sector in extent */ 378 u32 file_secno; /* first file sector in extent */
322 unsigned length; /* length, sectors */ 379 u32 length; /* length, sectors */
323 secno disk_secno; /* first corresponding disk sector */ 380 secno disk_secno; /* first corresponding disk sector */
324}; 381};
325 382
326struct bplus_internal_node 383struct bplus_internal_node
327{ 384{
328 unsigned file_secno; /* subtree maps sectors < this */ 385 u32 file_secno; /* subtree maps sectors < this */
329 anode_secno down; /* pointer to subtree */ 386 anode_secno down; /* pointer to subtree */
330}; 387};
331 388
332struct bplus_header 389struct bplus_header
333{ 390{
334 unsigned hbff: 1; /* high bit of first free entry offset */ 391#ifdef __LITTLE_ENDIAN
335 unsigned flag1: 1; 392 u8 hbff: 1; /* high bit of first free entry offset */
336 unsigned flag2: 1; 393 u8 flag1234: 4;
337 unsigned flag3: 1; 394 u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
338 unsigned flag4: 1;
339 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
340 the data btree or some ea or the 395 the data btree or some ea or the
341 main ea bootage pointer ea_secno */ 396 main ea bootage pointer ea_secno */
342 /* also can get set in fnodes, which 397 /* also can get set in fnodes, which
343 may be a chkdsk glitch or may mean 398 may be a chkdsk glitch or may mean
344 this bit is irrelevant in fnodes, 399 this bit is irrelevant in fnodes,
345 or this interpretation is all wet */ 400 or this interpretation is all wet */
346 unsigned binary_search: 1; /* suggest binary search (unused) */ 401 u8 binary_search: 1; /* suggest binary search (unused) */
347 unsigned internal: 1; /* 1 -> (internal) tree of anodes 402 u8 internal: 1; /* 1 -> (internal) tree of anodes
403 0 -> (leaf) list of extents */
404#else
405 u8 internal: 1; /* 1 -> (internal) tree of anodes
348 0 -> (leaf) list of extents */ 406 0 -> (leaf) list of extents */
349 unsigned char fill[3]; 407 u8 binary_search: 1; /* suggest binary search (unused) */
350 unsigned char n_free_nodes; /* free nodes in following array */ 408 u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
351 unsigned char n_used_nodes; /* used nodes in following array */ 409 the data btree or some ea or the
352 unsigned short first_free; /* offset from start of header to 410 main ea bootage pointer ea_secno */
411 /* also can get set in fnodes, which
412 may be a chkdsk glitch or may mean
413 this bit is irrelevant in fnodes,
414 or this interpretation is all wet */
415 u8 flag1234: 4;
416 u8 hbff: 1; /* high bit of first free entry offset */
417#endif
418 u8 fill[3];
419 u8 n_free_nodes; /* free nodes in following array */
420 u8 n_used_nodes; /* used nodes in following array */
421 u16 first_free; /* offset from start of header to
353 first free node in array */ 422 first free node in array */
354 union { 423 union {
355 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving 424 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
@@ -369,37 +438,38 @@ struct bplus_header
369 438
370struct fnode 439struct fnode
371{ 440{
372 unsigned magic; /* f7e4 0aae */ 441 u32 magic; /* f7e4 0aae */
373 unsigned zero1[2]; /* read history */ 442 u32 zero1[2]; /* read history */
374 unsigned char len, name[15]; /* true length, truncated name */ 443 u8 len, name[15]; /* true length, truncated name */
375 fnode_secno up; /* pointer to file's directory fnode */ 444 fnode_secno up; /* pointer to file's directory fnode */
376 /*unsigned zero2[3];*/
377 secno acl_size_l; 445 secno acl_size_l;
378 secno acl_secno; 446 secno acl_secno;
379 unsigned short acl_size_s; 447 u16 acl_size_s;
380 char acl_anode; 448 u8 acl_anode;
381 char zero2; /* history bit count */ 449 u8 zero2; /* history bit count */
382 unsigned ea_size_l; /* length of disk-resident ea's */ 450 u32 ea_size_l; /* length of disk-resident ea's */
383 secno ea_secno; /* first sector of disk-resident ea's*/ 451 secno ea_secno; /* first sector of disk-resident ea's*/
384 unsigned short ea_size_s; /* length of fnode-resident ea's */ 452 u16 ea_size_s; /* length of fnode-resident ea's */
385 453
386 unsigned flag0: 1; 454#ifdef __LITTLE_ENDIAN
387 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ 455 u8 flag0: 1;
388 unsigned flag2: 1; 456 u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
389 unsigned flag3: 1; 457 u8 flag234567: 6;
390 unsigned flag4: 1; 458#else
391 unsigned flag5: 1; 459 u8 flag234567: 6;
392 unsigned flag6: 1; 460 u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
393 unsigned flag7: 1; 461 u8 flag0: 1;
394 unsigned dirflag: 1; /* 1 -> directory. first & only extent 462#endif
463
464#ifdef __LITTLE_ENDIAN
465 u8 dirflag: 1; /* 1 -> directory. first & only extent
395 points to dnode. */ 466 points to dnode. */
396 unsigned flag9: 1; 467 u8 flag9012345: 7;
397 unsigned flag10: 1; 468#else
398 unsigned flag11: 1; 469 u8 flag9012345: 7;
399 unsigned flag12: 1; 470 u8 dirflag: 1; /* 1 -> directory. first & only extent
400 unsigned flag13: 1; 471 points to dnode. */
401 unsigned flag14: 1; 472#endif
402 unsigned flag15: 1;
403 473
404 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ 474 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
405 union { 475 union {
@@ -407,17 +477,16 @@ struct fnode
407 struct bplus_internal_node internal[12]; 477 struct bplus_internal_node internal[12];
408 } u; 478 } u;
409 479
410 unsigned file_size; /* file length, bytes */ 480 u32 file_size; /* file length, bytes */
411 unsigned n_needea; /* number of EA's with NEEDEA set */ 481 u32 n_needea; /* number of EA's with NEEDEA set */
412 char user_id[16]; /* unused */ 482 u8 user_id[16]; /* unused */
413 unsigned short ea_offs; /* offset from start of fnode 483 u16 ea_offs; /* offset from start of fnode
414 to first fnode-resident ea */ 484 to first fnode-resident ea */
415 char dasd_limit_treshhold; 485 u8 dasd_limit_treshhold;
416 char dasd_limit_delta; 486 u8 dasd_limit_delta;
417 unsigned dasd_limit; 487 u32 dasd_limit;
418 unsigned dasd_usage; 488 u32 dasd_usage;
419 /*unsigned zero5[2];*/ 489 u8 ea[316]; /* zero or more EA's, packed together
420 unsigned char ea[316]; /* zero or more EA's, packed together
421 with no alignment padding. 490 with no alignment padding.
422 (Do not use this name, get here 491 (Do not use this name, get here
423 via fnode + ea_offs. I think.) */ 492 via fnode + ea_offs. I think.) */
@@ -430,7 +499,7 @@ struct fnode
430 499
431struct anode 500struct anode
432{ 501{
433 unsigned magic; /* 37e4 0aae */ 502 u32 magic; /* 37e4 0aae */
434 anode_secno self; /* pointer to this anode */ 503 anode_secno self; /* pointer to this anode */
435 secno up; /* parent anode or fnode */ 504 secno up; /* parent anode or fnode */
436 505
@@ -440,7 +509,7 @@ struct anode
440 struct bplus_internal_node internal[60]; 509 struct bplus_internal_node internal[60];
441 } u; 510 } u;
442 511
443 unsigned fill[3]; /* unused */ 512 u32 fill[3]; /* unused */
444}; 513};
445 514
446 515
@@ -461,25 +530,31 @@ struct anode
461 530
462struct extended_attribute 531struct extended_attribute
463{ 532{
464 unsigned indirect: 1; /* 1 -> value gives sector number 533#ifdef __LITTLE_ENDIAN
534 u8 indirect: 1; /* 1 -> value gives sector number
465 where real value starts */ 535 where real value starts */
466 unsigned anode: 1; /* 1 -> sector is an anode 536 u8 anode: 1; /* 1 -> sector is an anode
537 that points to fragmented value */
538 u8 flag23456: 5;
539 u8 needea: 1; /* required ea */
540#else
541 u8 needea: 1; /* required ea */
542 u8 flag23456: 5;
543 u8 anode: 1; /* 1 -> sector is an anode
467 that points to fragmented value */ 544 that points to fragmented value */
468 unsigned flag2: 1; 545 u8 indirect: 1; /* 1 -> value gives sector number
469 unsigned flag3: 1; 546 where real value starts */
470 unsigned flag4: 1; 547#endif
471 unsigned flag5: 1; 548 u8 namelen; /* length of name, bytes */
472 unsigned flag6: 1; 549 u8 valuelen_lo; /* length of value, bytes */
473 unsigned needea: 1; /* required ea */ 550 u8 valuelen_hi; /* length of value, bytes */
474 unsigned char namelen; /* length of name, bytes */ 551 u8 name[0];
475 unsigned short valuelen; /* length of value, bytes */
476 unsigned char name[0];
477 /* 552 /*
478 unsigned char name[namelen]; ascii attrib name 553 u8 name[namelen]; ascii attrib name
479 unsigned char nul; terminating '\0', not counted 554 u8 nul; terminating '\0', not counted
480 unsigned char value[valuelen]; value, arbitrary 555 u8 value[valuelen]; value, arbitrary
481 if this.indirect, valuelen is 8 and the value is 556 if this.indirect, valuelen is 8 and the value is
482 unsigned length; real length of value, bytes 557 u32 length; real length of value, bytes
483 secno secno; sector address where it starts 558 secno secno; sector address where it starts
484 if this.anode, the above sector number is the root of an anode tree 559 if this.anode, the above sector number is the root of an anode tree
485 which points to the value. 560 which points to the value.
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c15adbca07ff..dd552f862c8f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,6 +13,7 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <asm/unaligned.h>
16 17
17#include "hpfs.h" 18#include "hpfs.h"
18 19
@@ -51,18 +52,16 @@ struct hpfs_inode_info {
51 unsigned i_disk_sec; /* (files) minimalist cache of alloc info */ 52 unsigned i_disk_sec; /* (files) minimalist cache of alloc info */
52 unsigned i_n_secs; /* (files) minimalist cache of alloc info */ 53 unsigned i_n_secs; /* (files) minimalist cache of alloc info */
53 unsigned i_ea_size; /* size of extended attributes */ 54 unsigned i_ea_size; /* size of extended attributes */
54 unsigned i_conv : 2; /* (files) crlf->newline hackery */
55 unsigned i_ea_mode : 1; /* file's permission is stored in ea */ 55 unsigned i_ea_mode : 1; /* file's permission is stored in ea */
56 unsigned i_ea_uid : 1; /* file's uid is stored in ea */ 56 unsigned i_ea_uid : 1; /* file's uid is stored in ea */
57 unsigned i_ea_gid : 1; /* file's gid is stored in ea */ 57 unsigned i_ea_gid : 1; /* file's gid is stored in ea */
58 unsigned i_dirty : 1; 58 unsigned i_dirty : 1;
59 struct mutex i_mutex;
60 struct mutex i_parent_mutex;
61 loff_t **i_rddir_off; 59 loff_t **i_rddir_off;
62 struct inode vfs_inode; 60 struct inode vfs_inode;
63}; 61};
64 62
65struct hpfs_sb_info { 63struct hpfs_sb_info {
64 struct mutex hpfs_mutex; /* global hpfs lock */
66 ino_t sb_root; /* inode number of root dir */ 65 ino_t sb_root; /* inode number of root dir */
67 unsigned sb_fs_size; /* file system size, sectors */ 66 unsigned sb_fs_size; /* file system size, sectors */
68 unsigned sb_bitmaps; /* sector number of bitmap list */ 67 unsigned sb_bitmaps; /* sector number of bitmap list */
@@ -74,7 +73,6 @@ struct hpfs_sb_info {
74 uid_t sb_uid; /* uid from mount options */ 73 uid_t sb_uid; /* uid from mount options */
75 gid_t sb_gid; /* gid from mount options */ 74 gid_t sb_gid; /* gid from mount options */
76 umode_t sb_mode; /* mode from mount options */ 75 umode_t sb_mode; /* mode from mount options */
77 unsigned sb_conv : 2; /* crlf->newline hackery */
78 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ 76 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */
79 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ 77 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */
80 unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */ 78 unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */
@@ -87,20 +85,9 @@ struct hpfs_sb_info {
87 unsigned *sb_bmp_dir; /* main bitmap directory */ 85 unsigned *sb_bmp_dir; /* main bitmap directory */
88 unsigned sb_c_bitmap; /* current bitmap */ 86 unsigned sb_c_bitmap; /* current bitmap */
89 unsigned sb_max_fwd_alloc; /* max forwad allocation */ 87 unsigned sb_max_fwd_alloc; /* max forwad allocation */
90 struct mutex hpfs_creation_de; /* when creating dirents, nobody else
91 can alloc blocks */
92 /*unsigned sb_mounting : 1;*/
93 int sb_timeshift; 88 int sb_timeshift;
94}; 89};
95 90
96/*
97 * conv= options
98 */
99
100#define CONV_BINARY 0 /* no conversion */
101#define CONV_TEXT 1 /* crlf->newline */
102#define CONV_AUTO 2 /* decide based on file contents */
103
104/* Four 512-byte buffers and the 2k block obtained by concatenating them */ 91/* Four 512-byte buffers and the 2k block obtained by concatenating them */
105 92
106struct quad_buffer_head { 93struct quad_buffer_head {
@@ -113,7 +100,7 @@ struct quad_buffer_head {
113static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) 100static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
114{ 101{
115 CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); 102 CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n"));
116 return *(dnode_secno *) ((void *) de + de->length - 4); 103 return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4));
117} 104}
118 105
119/* The first dir entry in a dnode */ 106/* The first dir entry in a dnode */
@@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
127 114
128static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) 115static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
129{ 116{
130 CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free)); 117 CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free)));
131 return (void *) dnode + dnode->first_free; 118 return (void *) dnode + le32_to_cpu(dnode->first_free);
132} 119}
133 120
134/* The dir entry after dir entry de */ 121/* The dir entry after dir entry de */
135 122
136static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) 123static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
137{ 124{
138 CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length)); 125 CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length)));
139 return (void *) de + de->length; 126 return (void *) de + le16_to_cpu(de->length);
140} 127}
141 128
142static inline struct extended_attribute *fnode_ea(struct fnode *fnode) 129static inline struct extended_attribute *fnode_ea(struct fnode *fnode)
143{ 130{
144 return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s); 131 return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s));
145} 132}
146 133
147static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode) 134static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode)
148{ 135{
149 return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s); 136 return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s));
137}
138
139static unsigned ea_valuelen(struct extended_attribute *ea)
140{
141 return ea->valuelen_lo + 256 * ea->valuelen_hi;
150} 142}
151 143
152static inline struct extended_attribute *next_ea(struct extended_attribute *ea) 144static inline struct extended_attribute *next_ea(struct extended_attribute *ea)
153{ 145{
154 return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen); 146 return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea));
155} 147}
156 148
157static inline secno ea_sec(struct extended_attribute *ea) 149static inline secno ea_sec(struct extended_attribute *ea)
158{ 150{
159 return *(secno *)((char *)ea + 9 + ea->namelen); 151 return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen)));
160} 152}
161 153
162static inline secno ea_len(struct extended_attribute *ea) 154static inline secno ea_len(struct extended_attribute *ea)
163{ 155{
164 return *(secno *)((char *)ea + 5 + ea->namelen); 156 return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen)));
165} 157}
166 158
167static inline char *ea_data(struct extended_attribute *ea) 159static inline char *ea_data(struct extended_attribute *ea)
@@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src)
186 dst->not_8x3 = n; 178 dst->not_8x3 = n;
187} 179}
188 180
189static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n) 181static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n)
190{ 182{
191 int i; 183 int i;
192 if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; 184 if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n;
193 if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1; 185 if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1;
194 for (i = 1; i < n; i++) 186 for (i = 1; i < n; i++)
195 if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1)) 187 if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1))
196 return i + 1; 188 return i + 1;
197 return 0; 189 return 0;
198} 190}
@@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
200/* alloc.c */ 192/* alloc.c */
201 193
202int hpfs_chk_sectors(struct super_block *, secno, int, char *); 194int hpfs_chk_sectors(struct super_block *, secno, int, char *);
203secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int); 195secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int);
204int hpfs_alloc_if_possible(struct super_block *, secno); 196int hpfs_alloc_if_possible(struct super_block *, secno);
205void hpfs_free_sectors(struct super_block *, secno, unsigned); 197void hpfs_free_sectors(struct super_block *, secno, unsigned);
206int hpfs_check_free_dnodes(struct super_block *, int); 198int hpfs_check_free_dnodes(struct super_block *, int);
207void hpfs_free_dnode(struct super_block *, secno); 199void hpfs_free_dnode(struct super_block *, secno);
208struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int); 200struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
209struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); 201struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
210struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); 202struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
211 203
@@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
222 214
223/* buffer.c */ 215/* buffer.c */
224 216
225void hpfs_lock_creation(struct super_block *);
226void hpfs_unlock_creation(struct super_block *);
227void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); 217void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int);
228void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); 218void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **);
229void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); 219void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int);
@@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *);
247struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, 237struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
248 const unsigned char *, unsigned, secno); 238 const unsigned char *, unsigned, secno);
249int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned, 239int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
250 struct hpfs_dirent *, int); 240 struct hpfs_dirent *);
251int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int); 241int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
252void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *); 242void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
253dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno); 243dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
@@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
303 const unsigned char *, unsigned, int); 293 const unsigned char *, unsigned, int);
304int hpfs_is_name_long(const unsigned char *, unsigned); 294int hpfs_is_name_long(const unsigned char *, unsigned);
305void hpfs_adjust_length(const unsigned char *, unsigned *); 295void hpfs_adjust_length(const unsigned char *, unsigned *);
306void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
307 296
308/* namei.c */ 297/* namei.c */
309 298
@@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
346/* 335/*
347 * Locking: 336 * Locking:
348 * 337 *
349 * hpfs_lock() is a leftover from the big kernel lock. 338 * hpfs_lock() locks the whole filesystem. It must be taken
350 * Right now, these functions are empty and only left 339 * on any method called by the VFS.
351 * for documentation purposes. The file system no longer
352 * works on SMP systems, so the lock is not needed
353 * any more.
354 * 340 *
355 * If someone is interested in making it work again, this 341 * We don't do any per-file locking anymore, it is hard to
356 * would be the place to start by adding a per-superblock 342 * review and HPFS is not performance-sensitive anyway.
357 * mutex and fixing all the bugs and performance issues
358 * caused by that.
359 */ 343 */
360static inline void hpfs_lock(struct super_block *s) 344static inline void hpfs_lock(struct super_block *s)
361{ 345{
346 struct hpfs_sb_info *sbi = hpfs_sb(s);
347 mutex_lock(&sbi->hpfs_mutex);
362} 348}
363 349
364static inline void hpfs_unlock(struct super_block *s) 350static inline void hpfs_unlock(struct super_block *s)
365{ 351{
352 struct hpfs_sb_info *sbi = hpfs_sb(s);
353 mutex_unlock(&sbi->hpfs_mutex);
354}
355
356static inline void hpfs_lock_assert(struct super_block *s)
357{
358 struct hpfs_sb_info *sbi = hpfs_sb(s);
359 WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex));
366} 360}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 87f1f787e767..338cd8368451 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
17 i->i_uid = hpfs_sb(sb)->sb_uid; 17 i->i_uid = hpfs_sb(sb)->sb_uid;
18 i->i_gid = hpfs_sb(sb)->sb_gid; 18 i->i_gid = hpfs_sb(sb)->sb_gid;
19 i->i_mode = hpfs_sb(sb)->sb_mode; 19 i->i_mode = hpfs_sb(sb)->sb_mode;
20 hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
21 i->i_size = -1; 20 i->i_size = -1;
22 i->i_blocks = -1; 21 i->i_blocks = -1;
23 22
@@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i)
116 i->i_mode |= S_IFDIR; 115 i->i_mode |= S_IFDIR;
117 i->i_op = &hpfs_dir_iops; 116 i->i_op = &hpfs_dir_iops;
118 i->i_fop = &hpfs_dir_ops; 117 i->i_fop = &hpfs_dir_ops;
119 hpfs_inode->i_parent_dir = fnode->up; 118 hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up);
120 hpfs_inode->i_dno = fnode->u.external[0].disk_secno; 119 hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno);
121 if (hpfs_sb(sb)->sb_chk >= 2) { 120 if (hpfs_sb(sb)->sb_chk >= 2) {
122 struct buffer_head *bh0; 121 struct buffer_head *bh0;
123 if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0); 122 if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0);
@@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i)
133 i->i_op = &hpfs_file_iops; 132 i->i_op = &hpfs_file_iops;
134 i->i_fop = &hpfs_file_ops; 133 i->i_fop = &hpfs_file_ops;
135 i->i_nlink = 1; 134 i->i_nlink = 1;
136 i->i_size = fnode->file_size; 135 i->i_size = le32_to_cpu(fnode->file_size);
137 i->i_blocks = ((i->i_size + 511) >> 9) + 1; 136 i->i_blocks = ((i->i_size + 511) >> 9) + 1;
138 i->i_data.a_ops = &hpfs_aops; 137 i->i_data.a_ops = &hpfs_aops;
139 hpfs_i(i)->mmu_private = i->i_size; 138 hpfs_i(i)->mmu_private = i->i_size;
@@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i)
144static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) 143static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
145{ 144{
146 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 145 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
147 /*if (fnode->acl_size_l || fnode->acl_size_s) { 146 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 147 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 148 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 149 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
@@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i)
187 kfree(hpfs_inode->i_rddir_off); 186 kfree(hpfs_inode->i_rddir_off);
188 hpfs_inode->i_rddir_off = NULL; 187 hpfs_inode->i_rddir_off = NULL;
189 } 188 }
190 mutex_lock(&hpfs_inode->i_parent_mutex);
191 if (!i->i_nlink) { 189 if (!i->i_nlink) {
192 mutex_unlock(&hpfs_inode->i_parent_mutex);
193 return; 190 return;
194 } 191 }
195 parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir); 192 parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i)
200 hpfs_read_inode(parent); 197 hpfs_read_inode(parent);
201 unlock_new_inode(parent); 198 unlock_new_inode(parent);
202 } 199 }
203 mutex_lock(&hpfs_inode->i_mutex);
204 hpfs_write_inode_nolock(i); 200 hpfs_write_inode_nolock(i);
205 mutex_unlock(&hpfs_inode->i_mutex);
206 iput(parent); 201 iput(parent);
207 } else {
208 mark_inode_dirty(i);
209 } 202 }
210 mutex_unlock(&hpfs_inode->i_parent_mutex);
211} 203}
212 204
213void hpfs_write_inode_nolock(struct inode *i) 205void hpfs_write_inode_nolock(struct inode *i)
@@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i)
226 } 218 }
227 } else de = NULL; 219 } else de = NULL;
228 if (S_ISREG(i->i_mode)) { 220 if (S_ISREG(i->i_mode)) {
229 fnode->file_size = i->i_size; 221 fnode->file_size = cpu_to_le32(i->i_size);
230 if (de) de->file_size = i->i_size; 222 if (de) de->file_size = cpu_to_le32(i->i_size);
231 } else if (S_ISDIR(i->i_mode)) { 223 } else if (S_ISDIR(i->i_mode)) {
232 fnode->file_size = 0; 224 fnode->file_size = cpu_to_le32(0);
233 if (de) de->file_size = 0; 225 if (de) de->file_size = cpu_to_le32(0);
234 } 226 }
235 hpfs_write_inode_ea(i, fnode); 227 hpfs_write_inode_ea(i, fnode);
236 if (de) { 228 if (de) {
237 de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); 229 de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
238 de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); 230 de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
239 de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); 231 de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
240 de->read_only = !(i->i_mode & 0222); 232 de->read_only = !(i->i_mode & 0222);
241 de->ea_size = hpfs_inode->i_ea_size; 233 de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
242 hpfs_mark_4buffers_dirty(&qbh); 234 hpfs_mark_4buffers_dirty(&qbh);
243 hpfs_brelse4(&qbh); 235 hpfs_brelse4(&qbh);
244 } 236 }
245 if (S_ISDIR(i->i_mode)) { 237 if (S_ISDIR(i->i_mode)) {
246 if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) { 238 if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
247 de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); 239 de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
248 de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); 240 de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
249 de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); 241 de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
250 de->read_only = !(i->i_mode & 0222); 242 de->read_only = !(i->i_mode & 0222);
251 de->ea_size = /*hpfs_inode->i_ea_size*/0; 243 de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
252 de->file_size = 0; 244 de->file_size = cpu_to_le32(0);
253 hpfs_mark_4buffers_dirty(&qbh); 245 hpfs_mark_4buffers_dirty(&qbh);
254 hpfs_brelse4(&qbh); 246 hpfs_brelse4(&qbh);
255 } else 247 } else
@@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
269 hpfs_lock(inode->i_sb); 261 hpfs_lock(inode->i_sb);
270 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) 262 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
271 goto out_unlock; 263 goto out_unlock;
264 if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
265 goto out_unlock;
266 if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
267 goto out_unlock;
272 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) 268 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
273 goto out_unlock; 269 goto out_unlock;
274 270
@@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
284 } 280 }
285 281
286 setattr_copy(inode, attr); 282 setattr_copy(inode, attr);
287 mark_inode_dirty(inode);
288 283
289 hpfs_write_inode(inode); 284 hpfs_write_inode(inode);
290 285
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 840d033ecee8..a790821366a7 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
21 hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); 21 hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
22 return NULL; 22 return NULL;
23 } 23 }
24 sec = hpfs_sb(s)->sb_bmp_dir[bmp_block]; 24 sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]);
25 if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { 25 if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) {
26 hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); 26 hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id);
27 return NULL; 27 return NULL;
@@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
46 struct code_page_data *cpd; 46 struct code_page_data *cpd;
47 struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); 47 struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0);
48 if (!cp) return NULL; 48 if (!cp) return NULL;
49 if (cp->magic != CP_DIR_MAGIC) { 49 if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) {
50 printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic); 50 printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic));
51 brelse(bh); 51 brelse(bh);
52 return NULL; 52 return NULL;
53 } 53 }
54 if (!cp->n_code_pages) { 54 if (!le32_to_cpu(cp->n_code_pages)) {
55 printk("HPFS: n_code_pages == 0\n"); 55 printk("HPFS: n_code_pages == 0\n");
56 brelse(bh); 56 brelse(bh);
57 return NULL; 57 return NULL;
58 } 58 }
59 cpds = cp->array[0].code_page_data; 59 cpds = le32_to_cpu(cp->array[0].code_page_data);
60 cpi = cp->array[0].index; 60 cpi = le16_to_cpu(cp->array[0].index);
61 brelse(bh); 61 brelse(bh);
62 62
63 if (cpi >= 3) { 63 if (cpi >= 3) {
@@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
66 } 66 }
67 67
68 if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; 68 if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL;
69 if ((unsigned)cpd->offs[cpi] > 0x178) { 69 if (le16_to_cpu(cpd->offs[cpi]) > 0x178) {
70 printk("HPFS: Code page index out of sector\n"); 70 printk("HPFS: Code page index out of sector\n");
71 brelse(bh); 71 brelse(bh);
72 return NULL; 72 return NULL;
73 } 73 }
74 ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6; 74 ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6;
75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) { 75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
76 printk("HPFS: out of memory for code page table\n"); 76 printk("HPFS: out of memory for code page table\n");
77 brelse(bh); 77 brelse(bh);
@@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
125 if (hpfs_sb(s)->sb_chk) { 125 if (hpfs_sb(s)->sb_chk) {
126 struct extended_attribute *ea; 126 struct extended_attribute *ea;
127 struct extended_attribute *ea_end; 127 struct extended_attribute *ea_end;
128 if (fnode->magic != FNODE_MAGIC) { 128 if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) {
129 hpfs_error(s, "bad magic on fnode %08lx", 129 hpfs_error(s, "bad magic on fnode %08lx",
130 (unsigned long)ino); 130 (unsigned long)ino);
131 goto bail; 131 goto bail;
@@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
138 (unsigned long)ino); 138 (unsigned long)ino);
139 goto bail; 139 goto bail;
140 } 140 }
141 if (fnode->btree.first_free != 141 if (le16_to_cpu(fnode->btree.first_free) !=
142 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { 142 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
143 hpfs_error(s, 143 hpfs_error(s,
144 "bad first_free pointer in fnode %08lx", 144 "bad first_free pointer in fnode %08lx",
@@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
146 goto bail; 146 goto bail;
147 } 147 }
148 } 148 }
149 if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 || 149 if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 ||
150 (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) { 150 le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) {
151 hpfs_error(s, 151 hpfs_error(s,
152 "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", 152 "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
153 (unsigned long)ino, 153 (unsigned long)ino,
154 fnode->ea_offs, fnode->ea_size_s); 154 le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
155 goto bail; 155 goto bail;
156 } 156 }
157 ea = fnode_ea(fnode); 157 ea = fnode_ea(fnode);
@@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff
178 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; 178 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL;
179 if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) 179 if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD)))
180 if (hpfs_sb(s)->sb_chk) { 180 if (hpfs_sb(s)->sb_chk) {
181 if (anode->magic != ANODE_MAGIC || anode->self != ano) { 181 if (le32_to_cpu(anode->magic) != ANODE_MAGIC) {
182 hpfs_error(s, "bad magic on anode %08x", ano); 182 hpfs_error(s, "bad magic on anode %08x", ano);
183 goto bail; 183 goto bail;
184 } 184 }
185 if (le32_to_cpu(anode->self) != ano) {
186 hpfs_error(s, "self pointer invalid on anode %08x", ano);
187 goto bail;
188 }
185 if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != 189 if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes !=
186 (anode->btree.internal ? 60 : 40)) { 190 (anode->btree.internal ? 60 : 40)) {
187 hpfs_error(s, "bad number of nodes in anode %08x", ano); 191 hpfs_error(s, "bad number of nodes in anode %08x", ano);
188 goto bail; 192 goto bail;
189 } 193 }
190 if (anode->btree.first_free != 194 if (le16_to_cpu(anode->btree.first_free) !=
191 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { 195 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) {
192 hpfs_error(s, "bad first_free pointer in anode %08x", ano); 196 hpfs_error(s, "bad first_free pointer in anode %08x", ano);
193 goto bail; 197 goto bail;
@@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
219 unsigned p, pp = 0; 223 unsigned p, pp = 0;
220 unsigned char *d = (unsigned char *)dnode; 224 unsigned char *d = (unsigned char *)dnode;
221 int b = 0; 225 int b = 0;
222 if (dnode->magic != DNODE_MAGIC) { 226 if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) {
223 hpfs_error(s, "bad magic on dnode %08x", secno); 227 hpfs_error(s, "bad magic on dnode %08x", secno);
224 goto bail; 228 goto bail;
225 } 229 }
226 if (dnode->self != secno) 230 if (le32_to_cpu(dnode->self) != secno)
227 hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self); 231 hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self));
228 /* Check dirents - bad dirents would cause infinite 232 /* Check dirents - bad dirents would cause infinite
229 loops or shooting to memory */ 233 loops or shooting to memory */
230 if (dnode->first_free > 2048/* || dnode->first_free < 84*/) { 234 if (le32_to_cpu(dnode->first_free) > 2048) {
231 hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free); 235 hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free));
232 goto bail; 236 goto bail;
233 } 237 }
234 for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) { 238 for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) {
235 struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); 239 struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p);
236 if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) { 240 if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) {
237 hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); 241 hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
238 goto bail; 242 goto bail;
239 } 243 }
240 if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) { 244 if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
241 if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok; 245 if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
242 hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); 246 hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
243 goto bail; 247 goto bail;
244 } 248 }
@@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
251 pp = p; 255 pp = p;
252 256
253 } 257 }
254 if (p != dnode->first_free) { 258 if (p != le32_to_cpu(dnode->first_free)) {
255 hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); 259 hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno);
256 goto bail; 260 goto bail;
257 } 261 }
@@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino)
277 if (!fnode) 281 if (!fnode)
278 return 0; 282 return 0;
279 283
280 dno = fnode->u.external[0].disk_secno; 284 dno = le32_to_cpu(fnode->u.external[0].disk_secno);
281 brelse(bh); 285 brelse(bh);
282 return dno; 286 return dno;
283} 287}
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index f24736d7a439..9acdf338def0 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,39 +8,6 @@
8 8
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static const char *text_postfix[]={
12".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
13".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
14".RC", ".TEX", ".TXT", ".Y", ""};
15
16static const char *text_prefix[]={
17"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
18"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
19
20void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
21{
22 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
23 int i;
24 if (hpfs_inode->i_conv != CONV_AUTO) return;
25 for (i = 0; *text_postfix[i]; i++) {
26 int l = strlen(text_postfix[i]);
27 if (l <= len)
28 if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0))
29 goto text;
30 }
31 for (i = 0; *text_prefix[i]; i++) {
32 int l = strlen(text_prefix[i]);
33 if (l <= len)
34 if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0))
35 goto text;
36 }
37 hpfs_inode->i_conv = CONV_BINARY;
38 return;
39 text:
40 hpfs_inode->i_conv = CONV_TEXT;
41 return;
42}
43
44static inline int not_allowed_char(unsigned char c) 11static inline int not_allowed_char(unsigned char c)
45{ 12{
46 return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' || 13 return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' ||
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d5f8c8a19023..1f05839c27a7 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
30 if (!fnode) 30 if (!fnode)
31 goto bail; 31 goto bail;
32 dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1); 32 dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0);
33 if (!dnode) 33 if (!dnode)
34 goto bail1; 34 goto bail1;
35 memset(&dee, 0, sizeof dee); 35 memset(&dee, 0, sizeof dee);
@@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
37 if (!(mode & 0222)) dee.read_only = 1; 37 if (!(mode & 0222)) dee.read_only = 1;
38 /*dee.archive = 0;*/ 38 /*dee.archive = 0;*/
39 dee.hidden = name[0] == '.'; 39 dee.hidden = name[0] == '.';
40 dee.fnode = fno; 40 dee.fnode = cpu_to_le32(fno);
41 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 41 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
42 result = new_inode(dir->i_sb); 42 result = new_inode(dir->i_sb);
43 if (!result) 43 if (!result)
44 goto bail2; 44 goto bail2;
@@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
46 result->i_ino = fno; 46 result->i_ino = fno;
47 hpfs_i(result)->i_parent_dir = dir->i_ino; 47 hpfs_i(result)->i_parent_dir = dir->i_ino;
48 hpfs_i(result)->i_dno = dno; 48 hpfs_i(result)->i_dno = dno;
49 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 49 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
50 result->i_ctime.tv_nsec = 0; 50 result->i_ctime.tv_nsec = 0;
51 result->i_mtime.tv_nsec = 0; 51 result->i_mtime.tv_nsec = 0;
52 result->i_atime.tv_nsec = 0; 52 result->i_atime.tv_nsec = 0;
@@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
60 if (dee.read_only) 60 if (dee.read_only)
61 result->i_mode &= ~0222; 61 result->i_mode &= ~0222;
62 62
63 mutex_lock(&hpfs_i(dir)->i_mutex); 63 r = hpfs_add_dirent(dir, name, len, &dee);
64 r = hpfs_add_dirent(dir, name, len, &dee, 0);
65 if (r == 1) 64 if (r == 1)
66 goto bail3; 65 goto bail3;
67 if (r == -1) { 66 if (r == -1) {
@@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
70 } 69 }
71 fnode->len = len; 70 fnode->len = len;
72 memcpy(fnode->name, name, len > 15 ? 15 : len); 71 memcpy(fnode->name, name, len > 15 ? 15 : len);
73 fnode->up = dir->i_ino; 72 fnode->up = cpu_to_le32(dir->i_ino);
74 fnode->dirflag = 1; 73 fnode->dirflag = 1;
75 fnode->btree.n_free_nodes = 7; 74 fnode->btree.n_free_nodes = 7;
76 fnode->btree.n_used_nodes = 1; 75 fnode->btree.n_used_nodes = 1;
77 fnode->btree.first_free = 0x14; 76 fnode->btree.first_free = cpu_to_le16(0x14);
78 fnode->u.external[0].disk_secno = dno; 77 fnode->u.external[0].disk_secno = cpu_to_le32(dno);
79 fnode->u.external[0].file_secno = -1; 78 fnode->u.external[0].file_secno = cpu_to_le32(-1);
80 dnode->root_dnode = 1; 79 dnode->root_dnode = 1;
81 dnode->up = fno; 80 dnode->up = cpu_to_le32(fno);
82 de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0); 81 de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0);
83 de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds()); 82 de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
84 if (!(mode & 0222)) de->read_only = 1; 83 if (!(mode & 0222)) de->read_only = 1;
85 de->first = de->directory = 1; 84 de->first = de->directory = 1;
86 /*de->hidden = de->system = 0;*/ 85 /*de->hidden = de->system = 0;*/
87 de->fnode = fno; 86 de->fnode = cpu_to_le32(fno);
88 mark_buffer_dirty(bh); 87 mark_buffer_dirty(bh);
89 brelse(bh); 88 brelse(bh);
90 hpfs_mark_4buffers_dirty(&qbh0); 89 hpfs_mark_4buffers_dirty(&qbh0);
@@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
101 hpfs_write_inode_nolock(result); 100 hpfs_write_inode_nolock(result);
102 } 101 }
103 d_instantiate(dentry, result); 102 d_instantiate(dentry, result);
104 mutex_unlock(&hpfs_i(dir)->i_mutex);
105 hpfs_unlock(dir->i_sb); 103 hpfs_unlock(dir->i_sb);
106 return 0; 104 return 0;
107bail3: 105bail3:
108 mutex_unlock(&hpfs_i(dir)->i_mutex);
109 iput(result); 106 iput(result);
110bail2: 107bail2:
111 hpfs_brelse4(&qbh0); 108 hpfs_brelse4(&qbh0);
@@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
140 if (!(mode & 0222)) dee.read_only = 1; 137 if (!(mode & 0222)) dee.read_only = 1;
141 dee.archive = 1; 138 dee.archive = 1;
142 dee.hidden = name[0] == '.'; 139 dee.hidden = name[0] == '.';
143 dee.fnode = fno; 140 dee.fnode = cpu_to_le32(fno);
144 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 141 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
145 142
146 result = new_inode(dir->i_sb); 143 result = new_inode(dir->i_sb);
147 if (!result) 144 if (!result)
@@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
154 result->i_op = &hpfs_file_iops; 151 result->i_op = &hpfs_file_iops;
155 result->i_fop = &hpfs_file_ops; 152 result->i_fop = &hpfs_file_ops;
156 result->i_nlink = 1; 153 result->i_nlink = 1;
157 hpfs_decide_conv(result, name, len);
158 hpfs_i(result)->i_parent_dir = dir->i_ino; 154 hpfs_i(result)->i_parent_dir = dir->i_ino;
159 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
160 result->i_ctime.tv_nsec = 0; 156 result->i_ctime.tv_nsec = 0;
161 result->i_mtime.tv_nsec = 0; 157 result->i_mtime.tv_nsec = 0;
162 result->i_atime.tv_nsec = 0; 158 result->i_atime.tv_nsec = 0;
@@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
168 result->i_data.a_ops = &hpfs_aops; 164 result->i_data.a_ops = &hpfs_aops;
169 hpfs_i(result)->mmu_private = 0; 165 hpfs_i(result)->mmu_private = 0;
170 166
171 mutex_lock(&hpfs_i(dir)->i_mutex); 167 r = hpfs_add_dirent(dir, name, len, &dee);
172 r = hpfs_add_dirent(dir, name, len, &dee, 0);
173 if (r == 1) 168 if (r == 1)
174 goto bail2; 169 goto bail2;
175 if (r == -1) { 170 if (r == -1) {
@@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
178 } 173 }
179 fnode->len = len; 174 fnode->len = len;
180 memcpy(fnode->name, name, len > 15 ? 15 : len); 175 memcpy(fnode->name, name, len > 15 ? 15 : len);
181 fnode->up = dir->i_ino; 176 fnode->up = cpu_to_le32(dir->i_ino);
182 mark_buffer_dirty(bh); 177 mark_buffer_dirty(bh);
183 brelse(bh); 178 brelse(bh);
184 179
@@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
193 hpfs_write_inode_nolock(result); 188 hpfs_write_inode_nolock(result);
194 } 189 }
195 d_instantiate(dentry, result); 190 d_instantiate(dentry, result);
196 mutex_unlock(&hpfs_i(dir)->i_mutex);
197 hpfs_unlock(dir->i_sb); 191 hpfs_unlock(dir->i_sb);
198 return 0; 192 return 0;
199 193
200bail2: 194bail2:
201 mutex_unlock(&hpfs_i(dir)->i_mutex);
202 iput(result); 195 iput(result);
203bail1: 196bail1:
204 brelse(bh); 197 brelse(bh);
@@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
232 if (!(mode & 0222)) dee.read_only = 1; 225 if (!(mode & 0222)) dee.read_only = 1;
233 dee.archive = 1; 226 dee.archive = 1;
234 dee.hidden = name[0] == '.'; 227 dee.hidden = name[0] == '.';
235 dee.fnode = fno; 228 dee.fnode = cpu_to_le32(fno);
236 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 229 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
237 230
238 result = new_inode(dir->i_sb); 231 result = new_inode(dir->i_sb);
239 if (!result) 232 if (!result)
@@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
242 hpfs_init_inode(result); 235 hpfs_init_inode(result);
243 result->i_ino = fno; 236 result->i_ino = fno;
244 hpfs_i(result)->i_parent_dir = dir->i_ino; 237 hpfs_i(result)->i_parent_dir = dir->i_ino;
245 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 238 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
246 result->i_ctime.tv_nsec = 0; 239 result->i_ctime.tv_nsec = 0;
247 result->i_mtime.tv_nsec = 0; 240 result->i_mtime.tv_nsec = 0;
248 result->i_atime.tv_nsec = 0; 241 result->i_atime.tv_nsec = 0;
@@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
254 result->i_blocks = 1; 247 result->i_blocks = 1;
255 init_special_inode(result, mode, rdev); 248 init_special_inode(result, mode, rdev);
256 249
257 mutex_lock(&hpfs_i(dir)->i_mutex); 250 r = hpfs_add_dirent(dir, name, len, &dee);
258 r = hpfs_add_dirent(dir, name, len, &dee, 0);
259 if (r == 1) 251 if (r == 1)
260 goto bail2; 252 goto bail2;
261 if (r == -1) { 253 if (r == -1) {
@@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
264 } 256 }
265 fnode->len = len; 257 fnode->len = len;
266 memcpy(fnode->name, name, len > 15 ? 15 : len); 258 memcpy(fnode->name, name, len > 15 ? 15 : len);
267 fnode->up = dir->i_ino; 259 fnode->up = cpu_to_le32(dir->i_ino);
268 mark_buffer_dirty(bh); 260 mark_buffer_dirty(bh);
269 261
270 insert_inode_hash(result); 262 insert_inode_hash(result);
271 263
272 hpfs_write_inode_nolock(result); 264 hpfs_write_inode_nolock(result);
273 d_instantiate(dentry, result); 265 d_instantiate(dentry, result);
274 mutex_unlock(&hpfs_i(dir)->i_mutex);
275 brelse(bh); 266 brelse(bh);
276 hpfs_unlock(dir->i_sb); 267 hpfs_unlock(dir->i_sb);
277 return 0; 268 return 0;
278bail2: 269bail2:
279 mutex_unlock(&hpfs_i(dir)->i_mutex);
280 iput(result); 270 iput(result);
281bail1: 271bail1:
282 brelse(bh); 272 brelse(bh);
@@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
310 memset(&dee, 0, sizeof dee); 300 memset(&dee, 0, sizeof dee);
311 dee.archive = 1; 301 dee.archive = 1;
312 dee.hidden = name[0] == '.'; 302 dee.hidden = name[0] == '.';
313 dee.fnode = fno; 303 dee.fnode = cpu_to_le32(fno);
314 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 304 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
315 305
316 result = new_inode(dir->i_sb); 306 result = new_inode(dir->i_sb);
317 if (!result) 307 if (!result)
@@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
319 result->i_ino = fno; 309 result->i_ino = fno;
320 hpfs_init_inode(result); 310 hpfs_init_inode(result);
321 hpfs_i(result)->i_parent_dir = dir->i_ino; 311 hpfs_i(result)->i_parent_dir = dir->i_ino;
322 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 312 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
323 result->i_ctime.tv_nsec = 0; 313 result->i_ctime.tv_nsec = 0;
324 result->i_mtime.tv_nsec = 0; 314 result->i_mtime.tv_nsec = 0;
325 result->i_atime.tv_nsec = 0; 315 result->i_atime.tv_nsec = 0;
@@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
333 result->i_op = &page_symlink_inode_operations; 323 result->i_op = &page_symlink_inode_operations;
334 result->i_data.a_ops = &hpfs_symlink_aops; 324 result->i_data.a_ops = &hpfs_symlink_aops;
335 325
336 mutex_lock(&hpfs_i(dir)->i_mutex); 326 r = hpfs_add_dirent(dir, name, len, &dee);
337 r = hpfs_add_dirent(dir, name, len, &dee, 0);
338 if (r == 1) 327 if (r == 1)
339 goto bail2; 328 goto bail2;
340 if (r == -1) { 329 if (r == -1) {
@@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
343 } 332 }
344 fnode->len = len; 333 fnode->len = len;
345 memcpy(fnode->name, name, len > 15 ? 15 : len); 334 memcpy(fnode->name, name, len > 15 ? 15 : len);
346 fnode->up = dir->i_ino; 335 fnode->up = cpu_to_le32(dir->i_ino);
347 hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink)); 336 hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
348 mark_buffer_dirty(bh); 337 mark_buffer_dirty(bh);
349 brelse(bh); 338 brelse(bh);
@@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
352 341
353 hpfs_write_inode_nolock(result); 342 hpfs_write_inode_nolock(result);
354 d_instantiate(dentry, result); 343 d_instantiate(dentry, result);
355 mutex_unlock(&hpfs_i(dir)->i_mutex);
356 hpfs_unlock(dir->i_sb); 344 hpfs_unlock(dir->i_sb);
357 return 0; 345 return 0;
358bail2: 346bail2:
359 mutex_unlock(&hpfs_i(dir)->i_mutex);
360 iput(result); 347 iput(result);
361bail1: 348bail1:
362 brelse(bh); 349 brelse(bh);
@@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
374 struct hpfs_dirent *de; 361 struct hpfs_dirent *de;
375 struct inode *inode = dentry->d_inode; 362 struct inode *inode = dentry->d_inode;
376 dnode_secno dno; 363 dnode_secno dno;
377 fnode_secno fno;
378 int r; 364 int r;
379 int rep = 0; 365 int rep = 0;
380 int err; 366 int err;
@@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
382 hpfs_lock(dir->i_sb); 368 hpfs_lock(dir->i_sb);
383 hpfs_adjust_length(name, &len); 369 hpfs_adjust_length(name, &len);
384again: 370again:
385 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
386 mutex_lock(&hpfs_i(dir)->i_mutex);
387 err = -ENOENT; 371 err = -ENOENT;
388 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 372 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
389 if (!de) 373 if (!de)
@@ -397,7 +381,6 @@ again:
397 if (de->directory) 381 if (de->directory)
398 goto out1; 382 goto out1;
399 383
400 fno = de->fnode;
401 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); 384 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
402 switch (r) { 385 switch (r) {
403 case 1: 386 case 1:
@@ -410,8 +393,6 @@ again:
410 if (rep++) 393 if (rep++)
411 break; 394 break;
412 395
413 mutex_unlock(&hpfs_i(dir)->i_mutex);
414 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
415 dentry_unhash(dentry); 396 dentry_unhash(dentry);
416 if (!d_unhashed(dentry)) { 397 if (!d_unhashed(dentry)) {
417 dput(dentry); 398 dput(dentry);
@@ -445,8 +426,6 @@ again:
445out1: 426out1:
446 hpfs_brelse4(&qbh); 427 hpfs_brelse4(&qbh);
447out: 428out:
448 mutex_unlock(&hpfs_i(dir)->i_mutex);
449 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
450 hpfs_unlock(dir->i_sb); 429 hpfs_unlock(dir->i_sb);
451 return err; 430 return err;
452} 431}
@@ -459,15 +438,12 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
459 struct hpfs_dirent *de; 438 struct hpfs_dirent *de;
460 struct inode *inode = dentry->d_inode; 439 struct inode *inode = dentry->d_inode;
461 dnode_secno dno; 440 dnode_secno dno;
462 fnode_secno fno;
463 int n_items = 0; 441 int n_items = 0;
464 int err; 442 int err;
465 int r; 443 int r;
466 444
467 hpfs_adjust_length(name, &len); 445 hpfs_adjust_length(name, &len);
468 hpfs_lock(dir->i_sb); 446 hpfs_lock(dir->i_sb);
469 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
470 mutex_lock(&hpfs_i(dir)->i_mutex);
471 err = -ENOENT; 447 err = -ENOENT;
472 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 448 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
473 if (!de) 449 if (!de)
@@ -486,7 +462,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
486 if (n_items) 462 if (n_items)
487 goto out1; 463 goto out1;
488 464
489 fno = de->fnode;
490 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); 465 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
491 switch (r) { 466 switch (r) {
492 case 1: 467 case 1:
@@ -505,8 +480,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
505out1: 480out1:
506 hpfs_brelse4(&qbh); 481 hpfs_brelse4(&qbh);
507out: 482out:
508 mutex_unlock(&hpfs_i(dir)->i_mutex);
509 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
510 hpfs_unlock(dir->i_sb); 483 hpfs_unlock(dir->i_sb);
511 return err; 484 return err;
512} 485}
@@ -568,12 +541,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
568 541
569 hpfs_lock(i->i_sb); 542 hpfs_lock(i->i_sb);
570 /* order doesn't matter, due to VFS exclusion */ 543 /* order doesn't matter, due to VFS exclusion */
571 mutex_lock(&hpfs_i(i)->i_parent_mutex);
572 if (new_inode)
573 mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
574 mutex_lock(&hpfs_i(old_dir)->i_mutex);
575 if (new_dir != old_dir)
576 mutex_lock(&hpfs_i(new_dir)->i_mutex);
577 544
578 /* Erm? Moving over the empty non-busy directory is perfectly legal */ 545 /* Erm? Moving over the empty non-busy directory is perfectly legal */
579 if (new_inode && S_ISDIR(new_inode->i_mode)) { 546 if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -610,9 +577,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
610 577
611 if (new_dir == old_dir) hpfs_brelse4(&qbh); 578 if (new_dir == old_dir) hpfs_brelse4(&qbh);
612 579
613 hpfs_lock_creation(i->i_sb); 580 if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) {
614 if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) {
615 hpfs_unlock_creation(i->i_sb);
616 if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!"); 581 if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!");
617 err = r == 1 ? -ENOSPC : -EFSERROR; 582 err = r == 1 ? -ENOSPC : -EFSERROR;
618 if (new_dir != old_dir) hpfs_brelse4(&qbh); 583 if (new_dir != old_dir) hpfs_brelse4(&qbh);
@@ -621,20 +586,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 586
622 if (new_dir == old_dir) 587 if (new_dir == old_dir)
623 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) { 588 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
624 hpfs_unlock_creation(i->i_sb);
625 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2"); 589 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
626 err = -ENOENT; 590 err = -ENOENT;
627 goto end1; 591 goto end1;
628 } 592 }
629 593
630 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) { 594 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) {
631 hpfs_unlock_creation(i->i_sb);
632 hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent"); 595 hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent");
633 err = r == 2 ? -ENOSPC : -EFSERROR; 596 err = r == 2 ? -ENOSPC : -EFSERROR;
634 goto end1; 597 goto end1;
635 } 598 }
636 hpfs_unlock_creation(i->i_sb); 599
637
638 end: 600 end:
639 hpfs_i(i)->i_parent_dir = new_dir->i_ino; 601 hpfs_i(i)->i_parent_dir = new_dir->i_ino;
640 if (S_ISDIR(i->i_mode)) { 602 if (S_ISDIR(i->i_mode)) {
@@ -642,22 +604,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
642 drop_nlink(old_dir); 604 drop_nlink(old_dir);
643 } 605 }
644 if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) { 606 if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) {
645 fnode->up = new_dir->i_ino; 607 fnode->up = cpu_to_le32(new_dir->i_ino);
646 fnode->len = new_len; 608 fnode->len = new_len;
647 memcpy(fnode->name, new_name, new_len>15?15:new_len); 609 memcpy(fnode->name, new_name, new_len>15?15:new_len);
648 if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len); 610 if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len);
649 mark_buffer_dirty(bh); 611 mark_buffer_dirty(bh);
650 brelse(bh); 612 brelse(bh);
651 } 613 }
652 hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
653 hpfs_decide_conv(i, new_name, new_len);
654end1: 614end1:
655 if (old_dir != new_dir)
656 mutex_unlock(&hpfs_i(new_dir)->i_mutex);
657 mutex_unlock(&hpfs_i(old_dir)->i_mutex);
658 mutex_unlock(&hpfs_i(i)->i_parent_mutex);
659 if (new_inode)
660 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
661 hpfs_unlock(i->i_sb); 615 hpfs_unlock(i->i_sb);
662 return err; 616 return err;
663} 617}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c89b40808587..98580a3b5005 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -18,15 +18,16 @@
18 18
19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
20 20
21static void mark_dirty(struct super_block *s) 21static void mark_dirty(struct super_block *s, int remount)
22{ 22{
23 if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) { 23 if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) {
24 struct buffer_head *bh; 24 struct buffer_head *bh;
25 struct hpfs_spare_block *sb; 25 struct hpfs_spare_block *sb;
26 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { 26 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
27 sb->dirty = 1; 27 sb->dirty = 1;
28 sb->old_wrote = 0; 28 sb->old_wrote = 0;
29 mark_buffer_dirty(bh); 29 mark_buffer_dirty(bh);
30 sync_dirty_buffer(bh);
30 brelse(bh); 31 brelse(bh);
31 } 32 }
32 } 33 }
@@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s)
40 struct buffer_head *bh; 41 struct buffer_head *bh;
41 struct hpfs_spare_block *sb; 42 struct hpfs_spare_block *sb;
42 if (s->s_flags & MS_RDONLY) return; 43 if (s->s_flags & MS_RDONLY) return;
44 sync_blockdev(s->s_bdev);
43 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { 45 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
44 sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error; 46 sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error;
45 sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error; 47 sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error;
46 mark_buffer_dirty(bh); 48 mark_buffer_dirty(bh);
49 sync_dirty_buffer(bh);
47 brelse(bh); 50 brelse(bh);
48 } 51 }
49} 52}
@@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
63 if (!hpfs_sb(s)->sb_was_error) { 66 if (!hpfs_sb(s)->sb_was_error) {
64 if (hpfs_sb(s)->sb_err == 2) { 67 if (hpfs_sb(s)->sb_err == 2) {
65 printk("; crashing the system because you wanted it\n"); 68 printk("; crashing the system because you wanted it\n");
66 mark_dirty(s); 69 mark_dirty(s, 0);
67 panic("HPFS panic"); 70 panic("HPFS panic");
68 } else if (hpfs_sb(s)->sb_err == 1) { 71 } else if (hpfs_sb(s)->sb_err == 1) {
69 if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n"); 72 if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n");
70 else { 73 else {
71 printk("; remounting read-only\n"); 74 printk("; remounting read-only\n");
72 mark_dirty(s); 75 mark_dirty(s, 0);
73 s->s_flags |= MS_RDONLY; 76 s->s_flags |= MS_RDONLY;
74 } 77 }
75 } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n"); 78 } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
@@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s)
102{ 105{
103 struct hpfs_sb_info *sbi = hpfs_sb(s); 106 struct hpfs_sb_info *sbi = hpfs_sb(s);
104 107
108 hpfs_lock(s);
109 unmark_dirty(s);
110 hpfs_unlock(s);
111
105 kfree(sbi->sb_cp_table); 112 kfree(sbi->sb_cp_table);
106 kfree(sbi->sb_bmp_dir); 113 kfree(sbi->sb_bmp_dir);
107 unmark_dirty(s);
108 s->s_fs_info = NULL; 114 s->s_fs_info = NULL;
109 kfree(sbi); 115 kfree(sbi);
110} 116}
@@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s)
129 n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; 135 n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
130 count = 0; 136 count = 0;
131 for (n = 0; n < n_bands; n++) 137 for (n = 0; n < n_bands; n++)
132 count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]); 138 count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
133 return count; 139 return count;
134} 140}
135 141
@@ -188,8 +194,6 @@ static void init_once(void *foo)
188{ 194{
189 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 195 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
190 196
191 mutex_init(&ei->i_mutex);
192 mutex_init(&ei->i_parent_mutex);
193 inode_init_once(&ei->vfs_inode); 197 inode_init_once(&ei->vfs_inode);
194} 198}
195 199
@@ -218,7 +222,6 @@ static void destroy_inodecache(void)
218 222
219enum { 223enum {
220 Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis, 224 Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
221 Opt_conv_binary, Opt_conv_text, Opt_conv_auto,
222 Opt_check_none, Opt_check_normal, Opt_check_strict, 225 Opt_check_none, Opt_check_normal, Opt_check_strict,
223 Opt_err_cont, Opt_err_ro, Opt_err_panic, 226 Opt_err_cont, Opt_err_ro, Opt_err_panic,
224 Opt_eas_no, Opt_eas_ro, Opt_eas_rw, 227 Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
@@ -233,9 +236,6 @@ static const match_table_t tokens = {
233 {Opt_umask, "umask=%o"}, 236 {Opt_umask, "umask=%o"},
234 {Opt_case_lower, "case=lower"}, 237 {Opt_case_lower, "case=lower"},
235 {Opt_case_asis, "case=asis"}, 238 {Opt_case_asis, "case=asis"},
236 {Opt_conv_binary, "conv=binary"},
237 {Opt_conv_text, "conv=text"},
238 {Opt_conv_auto, "conv=auto"},
239 {Opt_check_none, "check=none"}, 239 {Opt_check_none, "check=none"},
240 {Opt_check_normal, "check=normal"}, 240 {Opt_check_normal, "check=normal"},
241 {Opt_check_strict, "check=strict"}, 241 {Opt_check_strict, "check=strict"},
@@ -253,7 +253,7 @@ static const match_table_t tokens = {
253}; 253};
254 254
255static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, 255static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
256 int *lowercase, int *conv, int *eas, int *chk, int *errs, 256 int *lowercase, int *eas, int *chk, int *errs,
257 int *chkdsk, int *timeshift) 257 int *chkdsk, int *timeshift)
258{ 258{
259 char *p; 259 char *p;
@@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
295 case Opt_case_asis: 295 case Opt_case_asis:
296 *lowercase = 0; 296 *lowercase = 0;
297 break; 297 break;
298 case Opt_conv_binary:
299 *conv = CONV_BINARY;
300 break;
301 case Opt_conv_text:
302 *conv = CONV_TEXT;
303 break;
304 case Opt_conv_auto:
305 *conv = CONV_AUTO;
306 break;
307 case Opt_check_none: 298 case Opt_check_none:
308 *chk = 0; 299 *chk = 0;
309 break; 300 break;
@@ -370,9 +361,6 @@ HPFS filesystem options:\n\
370 umask=xxx set mode of files that don't have mode specified in eas\n\ 361 umask=xxx set mode of files that don't have mode specified in eas\n\
371 case=lower lowercase all files\n\ 362 case=lower lowercase all files\n\
372 case=asis do not lowercase files (default)\n\ 363 case=asis do not lowercase files (default)\n\
373 conv=binary do not convert CR/LF -> LF (default)\n\
374 conv=auto convert only files with known text extensions\n\
375 conv=text convert all files\n\
376 check=none no fs checks - kernel may crash on corrupted filesystem\n\ 364 check=none no fs checks - kernel may crash on corrupted filesystem\n\
377 check=normal do some checks - it should not crash (default)\n\ 365 check=normal do some checks - it should not crash (default)\n\
378 check=strict do extra time-consuming checks, used for debugging\n\ 366 check=strict do extra time-consuming checks, used for debugging\n\
@@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
394 uid_t uid; 382 uid_t uid;
395 gid_t gid; 383 gid_t gid;
396 umode_t umask; 384 umode_t umask;
397 int lowercase, conv, eas, chk, errs, chkdsk, timeshift; 385 int lowercase, eas, chk, errs, chkdsk, timeshift;
398 int o; 386 int o;
399 struct hpfs_sb_info *sbi = hpfs_sb(s); 387 struct hpfs_sb_info *sbi = hpfs_sb(s);
400 char *new_opts = kstrdup(data, GFP_KERNEL); 388 char *new_opts = kstrdup(data, GFP_KERNEL);
@@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
405 lock_super(s); 393 lock_super(s);
406 uid = sbi->sb_uid; gid = sbi->sb_gid; 394 uid = sbi->sb_uid; gid = sbi->sb_gid;
407 umask = 0777 & ~sbi->sb_mode; 395 umask = 0777 & ~sbi->sb_mode;
408 lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; 396 lowercase = sbi->sb_lowercase;
409 eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk; 397 eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
410 errs = sbi->sb_err; timeshift = sbi->sb_timeshift; 398 errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
411 399
412 if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv, 400 if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
413 &eas, &chk, &errs, &chkdsk, &timeshift))) { 401 &eas, &chk, &errs, &chkdsk, &timeshift))) {
414 printk("HPFS: bad mount options.\n"); 402 printk("HPFS: bad mount options.\n");
415 goto out_err; 403 goto out_err;
@@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
427 415
428 sbi->sb_uid = uid; sbi->sb_gid = gid; 416 sbi->sb_uid = uid; sbi->sb_gid = gid;
429 sbi->sb_mode = 0777 & ~umask; 417 sbi->sb_mode = 0777 & ~umask;
430 sbi->sb_lowercase = lowercase; sbi->sb_conv = conv; 418 sbi->sb_lowercase = lowercase;
431 sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; 419 sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
432 sbi->sb_err = errs; sbi->sb_timeshift = timeshift; 420 sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
433 421
434 if (!(*flags & MS_RDONLY)) mark_dirty(s); 422 if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
435 423
436 replace_mount_options(s, new_opts); 424 replace_mount_options(s, new_opts);
437 425
@@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
471 uid_t uid; 459 uid_t uid;
472 gid_t gid; 460 gid_t gid;
473 umode_t umask; 461 umode_t umask;
474 int lowercase, conv, eas, chk, errs, chkdsk, timeshift; 462 int lowercase, eas, chk, errs, chkdsk, timeshift;
475 463
476 dnode_secno root_dno; 464 dnode_secno root_dno;
477 struct hpfs_dirent *de = NULL; 465 struct hpfs_dirent *de = NULL;
@@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
479 467
480 int o; 468 int o;
481 469
482 if (num_possible_cpus() > 1) {
483 printk(KERN_ERR "HPFS is not SMP safe\n");
484 return -EINVAL;
485 }
486
487 save_mount_options(s, options); 470 save_mount_options(s, options);
488 471
489 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 472 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
@@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
495 sbi->sb_bmp_dir = NULL; 478 sbi->sb_bmp_dir = NULL;
496 sbi->sb_cp_table = NULL; 479 sbi->sb_cp_table = NULL;
497 480
498 mutex_init(&sbi->hpfs_creation_de); 481 mutex_init(&sbi->hpfs_mutex);
482 hpfs_lock(s);
499 483
500 uid = current_uid(); 484 uid = current_uid();
501 gid = current_gid(); 485 gid = current_gid();
502 umask = current_umask(); 486 umask = current_umask();
503 lowercase = 0; 487 lowercase = 0;
504 conv = CONV_BINARY;
505 eas = 2; 488 eas = 2;
506 chk = 1; 489 chk = 1;
507 errs = 1; 490 errs = 1;
508 chkdsk = 1; 491 chkdsk = 1;
509 timeshift = 0; 492 timeshift = 0;
510 493
511 if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv, 494 if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
512 &eas, &chk, &errs, &chkdsk, &timeshift))) { 495 &eas, &chk, &errs, &chkdsk, &timeshift))) {
513 printk("HPFS: bad mount options.\n"); 496 printk("HPFS: bad mount options.\n");
514 goto bail0; 497 goto bail0;
@@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
526 if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3; 509 if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3;
527 510
528 /* Check magics */ 511 /* Check magics */
529 if (/*bootblock->magic != BB_MAGIC 512 if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC
530 ||*/ superblock->magic != SB_MAGIC 513 ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC
531 || spareblock->magic != SP_MAGIC) { 514 || le32_to_cpu(spareblock->magic) != SP_MAGIC) {
532 if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n"); 515 if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n");
533 goto bail4; 516 goto bail4;
534 } 517 }
@@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
549 s->s_op = &hpfs_sops; 532 s->s_op = &hpfs_sops;
550 s->s_d_op = &hpfs_dentry_operations; 533 s->s_d_op = &hpfs_dentry_operations;
551 534
552 sbi->sb_root = superblock->root; 535 sbi->sb_root = le32_to_cpu(superblock->root);
553 sbi->sb_fs_size = superblock->n_sectors; 536 sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors);
554 sbi->sb_bitmaps = superblock->bitmaps; 537 sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps);
555 sbi->sb_dirband_start = superblock->dir_band_start; 538 sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
556 sbi->sb_dirband_size = superblock->n_dir_band; 539 sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
557 sbi->sb_dmap = superblock->dir_band_bitmap; 540 sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
558 sbi->sb_uid = uid; 541 sbi->sb_uid = uid;
559 sbi->sb_gid = gid; 542 sbi->sb_gid = gid;
560 sbi->sb_mode = 0777 & ~umask; 543 sbi->sb_mode = 0777 & ~umask;
561 sbi->sb_n_free = -1; 544 sbi->sb_n_free = -1;
562 sbi->sb_n_free_dnodes = -1; 545 sbi->sb_n_free_dnodes = -1;
563 sbi->sb_lowercase = lowercase; 546 sbi->sb_lowercase = lowercase;
564 sbi->sb_conv = conv;
565 sbi->sb_eas = eas; 547 sbi->sb_eas = eas;
566 sbi->sb_chk = chk; 548 sbi->sb_chk = chk;
567 sbi->sb_chkdsk = chkdsk; 549 sbi->sb_chkdsk = chkdsk;
@@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
573 sbi->sb_max_fwd_alloc = 0xffffff; 555 sbi->sb_max_fwd_alloc = 0xffffff;
574 556
575 /* Load bitmap directory */ 557 /* Load bitmap directory */
576 if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps))) 558 if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
577 goto bail4; 559 goto bail4;
578 560
579 /* Check for general fs errors*/ 561 /* Check for general fs errors*/
@@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
591 mark_buffer_dirty(bh2); 573 mark_buffer_dirty(bh2);
592 } 574 }
593 575
594 if (spareblock->hotfixes_used || spareblock->n_spares_used) { 576 if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) {
595 if (errs >= 2) { 577 if (errs >= 2) {
596 printk("HPFS: Hotfixes not supported here, try chkdsk\n"); 578 printk("HPFS: Hotfixes not supported here, try chkdsk\n");
597 mark_dirty(s); 579 mark_dirty(s, 0);
598 goto bail4; 580 goto bail4;
599 } 581 }
600 hpfs_error(s, "hotfixes not supported here, try chkdsk"); 582 hpfs_error(s, "hotfixes not supported here, try chkdsk");
601 if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n"); 583 if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n");
602 else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n"); 584 else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n");
603 } 585 }
604 if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) { 586 if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
605 if (errs >= 2) { 587 if (errs >= 2) {
606 printk("HPFS: Spare dnodes used, try chkdsk\n"); 588 printk("HPFS: Spare dnodes used, try chkdsk\n");
607 mark_dirty(s); 589 mark_dirty(s, 0);
608 goto bail4; 590 goto bail4;
609 } 591 }
610 hpfs_error(s, "warning: spare dnodes used, try chkdsk"); 592 hpfs_error(s, "warning: spare dnodes used, try chkdsk");
@@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
612 } 594 }
613 if (chk) { 595 if (chk) {
614 unsigned a; 596 unsigned a;
615 if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band || 597 if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
616 superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) { 598 le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
617 hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x", 599 hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x",
618 superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band); 600 le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band));
619 goto bail4; 601 goto bail4;
620 } 602 }
621 a = sbi->sb_dirband_size; 603 a = sbi->sb_dirband_size;
622 sbi->sb_dirband_size = 0; 604 sbi->sb_dirband_size = 0;
623 if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") || 605 if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") ||
624 hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") || 606 hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") ||
625 hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) { 607 hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) {
626 mark_dirty(s); 608 mark_dirty(s, 0);
627 goto bail4; 609 goto bail4;
628 } 610 }
629 sbi->sb_dirband_size = a; 611 sbi->sb_dirband_size = a;
630 } else printk("HPFS: You really don't want any checks? You are crazy...\n"); 612 } else printk("HPFS: You really don't want any checks? You are crazy...\n");
631 613
632 /* Load code page table */ 614 /* Load code page table */
633 if (spareblock->n_code_pages) 615 if (le32_to_cpu(spareblock->n_code_pages))
634 if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir))) 616 if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir))))
635 printk("HPFS: Warning: code page support is disabled\n"); 617 printk("HPFS: Warning: code page support is disabled\n");
636 618
637 brelse(bh2); 619 brelse(bh2);
@@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
660 if (!de) 642 if (!de)
661 hpfs_error(s, "unable to find root dir"); 643 hpfs_error(s, "unable to find root dir");
662 else { 644 else {
663 root->i_atime.tv_sec = local_to_gmt(s, de->read_date); 645 root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date));
664 root->i_atime.tv_nsec = 0; 646 root->i_atime.tv_nsec = 0;
665 root->i_mtime.tv_sec = local_to_gmt(s, de->write_date); 647 root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
666 root->i_mtime.tv_nsec = 0; 648 root->i_mtime.tv_nsec = 0;
667 root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date); 649 root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
668 root->i_ctime.tv_nsec = 0; 650 root->i_ctime.tv_nsec = 0;
669 hpfs_i(root)->i_ea_size = de->ea_size; 651 hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size);
670 hpfs_i(root)->i_parent_dir = root->i_ino; 652 hpfs_i(root)->i_parent_dir = root->i_ino;
671 if (root->i_size == -1) 653 if (root->i_size == -1)
672 root->i_size = 2048; 654 root->i_size = 2048;
@@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
674 root->i_blocks = 5; 656 root->i_blocks = 5;
675 hpfs_brelse4(&qbh); 657 hpfs_brelse4(&qbh);
676 } 658 }
659 hpfs_unlock(s);
677 return 0; 660 return 0;
678 661
679bail4: brelse(bh2); 662bail4: brelse(bh2);
@@ -681,6 +664,7 @@ bail3: brelse(bh1);
681bail2: brelse(bh0); 664bail2: brelse(bh0);
682bail1: 665bail1:
683bail0: 666bail0:
667 hpfs_unlock(s);
684 kfree(sbi->sb_bmp_dir); 668 kfree(sbi->sb_bmp_dir);
685 kfree(sbi->sb_cp_table); 669 kfree(sbi->sb_cp_table);
686 s->s_fs_info = NULL; 670 s->s_fs_info = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 5f4e11aaeb5c..33c963d08ab4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -125,6 +125,14 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
125static DECLARE_RWSEM(iprune_sem); 125static DECLARE_RWSEM(iprune_sem);
126 126
127/* 127/*
128 * Empty aops. Can be used for the cases where the user does not
129 * define any of the address_space operations.
130 */
131const struct address_space_operations empty_aops = {
132};
133EXPORT_SYMBOL(empty_aops);
134
135/*
128 * Statistics gathering.. 136 * Statistics gathering..
129 */ 137 */
130struct inodes_stat_t inodes_stat; 138struct inodes_stat_t inodes_stat;
@@ -176,7 +184,6 @@ int proc_nr_inodes(ctl_table *table, int write,
176 */ 184 */
177int inode_init_always(struct super_block *sb, struct inode *inode) 185int inode_init_always(struct super_block *sb, struct inode *inode)
178{ 186{
179 static const struct address_space_operations empty_aops;
180 static const struct inode_operations empty_iops; 187 static const struct inode_operations empty_iops;
181 static const struct file_operations empty_fops; 188 static const struct file_operations empty_fops;
182 struct address_space *const mapping = &inode->i_data; 189 struct address_space *const mapping = &inode->i_data;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index da871ee084d3..69b180459463 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -362,7 +362,7 @@ void journal_commit_transaction(journal_t *journal)
362 * we do not require it to remember exactly which old buffers it 362 * we do not require it to remember exactly which old buffers it
363 * has reserved. This is consistent with the existing behaviour 363 * has reserved. This is consistent with the existing behaviour
364 * that multiple journal_get_write_access() calls to the same 364 * that multiple journal_get_write_access() calls to the same
365 * buffer are perfectly permissable. 365 * buffer are perfectly permissible.
366 */ 366 */
367 while (commit_transaction->t_reserved_list) { 367 while (commit_transaction->t_reserved_list) {
368 jh = commit_transaction->t_reserved_list; 368 jh = commit_transaction->t_reserved_list;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index eb11601f2e00..b3713afaaa9e 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -770,7 +770,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
770 journal->j_wbufsize = n; 770 journal->j_wbufsize = n;
771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
772 if (!journal->j_wbuf) { 772 if (!journal->j_wbuf) {
773 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 773 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
774 __func__); 774 __func__);
775 goto out_err; 775 goto out_err;
776 } 776 }
@@ -831,7 +831,7 @@ journal_t * journal_init_inode (struct inode *inode)
831 journal->j_wbufsize = n; 831 journal->j_wbufsize = n;
832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
833 if (!journal->j_wbuf) { 833 if (!journal->j_wbuf) {
834 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 834 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
835 __func__); 835 __func__);
836 goto out_err; 836 goto out_err;
837 } 837 }
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d29018307e2e..305a90763154 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5b2e4c30a2a1..60d2319651b2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1392,7 +1392,7 @@ int journal_stop(handle_t *handle)
1392 * by 30x or more... 1392 * by 30x or more...
1393 * 1393 *
1394 * We try and optimize the sleep time against what the underlying disk 1394 * We try and optimize the sleep time against what the underlying disk
1395 * can do, instead of having a static sleep time. This is usefull for 1395 * can do, instead of having a static sleep time. This is useful for
1396 * the case where our storage is so fast that it is more optimal to go 1396 * the case where our storage is so fast that it is more optimal to go
1397 * ahead and force a flush and wait for the transaction to be committed 1397 * ahead and force a flush and wait for the transaction to be committed
1398 * than it is to wait for an arbitrary amount of time for new writers to 1398 * than it is to wait for an arbitrary amount of time for new writers to
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fa36d7662b21..6e28000a4b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
105 int ret; 105 int ret;
106 struct timespec now = current_kernel_time(); 106 struct timespec now = current_kernel_time();
107 107
108 *cbh = NULL;
109
108 if (is_journal_aborted(journal)) 110 if (is_journal_aborted(journal))
109 return 0; 111 return 0;
110 112
@@ -403,7 +405,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
403 * we do not require it to remember exactly which old buffers it 405 * we do not require it to remember exactly which old buffers it
404 * has reserved. This is consistent with the existing behaviour 406 * has reserved. This is consistent with the existing behaviour
405 * that multiple jbd2_journal_get_write_access() calls to the same 407 * that multiple jbd2_journal_get_write_access() calls to the same
406 * buffer are perfectly permissable. 408 * buffer are perfectly permissible.
407 */ 409 */
408 while (commit_transaction->t_reserved_list) { 410 while (commit_transaction->t_reserved_list) {
409 jh = commit_transaction->t_reserved_list; 411 jh = commit_transaction->t_reserved_list;
@@ -806,7 +808,7 @@ wait_for_iobuf:
806 if (err) 808 if (err)
807 __jbd2_journal_abort_hard(journal); 809 __jbd2_journal_abort_hard(journal);
808 } 810 }
809 if (!err && !is_journal_aborted(journal)) 811 if (cbh)
810 err = journal_wait_on_commit_record(journal, cbh); 812 err = journal_wait_on_commit_record(journal, cbh);
811 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 813 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
812 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 814 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 90407b8fece7..e0ec3db1c395 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -917,7 +917,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
917 journal->j_wbufsize = n; 917 journal->j_wbufsize = n;
918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
919 if (!journal->j_wbuf) { 919 if (!journal->j_wbuf) {
920 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 920 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
921 __func__); 921 __func__);
922 goto out_err; 922 goto out_err;
923 } 923 }
@@ -983,7 +983,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
983 journal->j_wbufsize = n; 983 journal->j_wbufsize = n;
984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
985 if (!journal->j_wbuf) { 985 if (!journal->j_wbuf) {
986 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 986 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
987 __func__); 987 __func__);
988 goto out_err; 988 goto out_err;
989 } 989 }
@@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2414 if (!new_dev) 2414 if (!new_dev)
2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2416 bd = bdget(device);
2416 spin_lock(&devname_cache_lock); 2417 spin_lock(&devname_cache_lock);
2417 if (devcache[i]) { 2418 if (devcache[i]) {
2418 if (devcache[i]->device == device) { 2419 if (devcache[i]->device == device) {
2419 kfree(new_dev); 2420 kfree(new_dev);
2421 bdput(bd);
2420 ret = devcache[i]->devname; 2422 ret = devcache[i]->devname;
2421 spin_unlock(&devname_cache_lock); 2423 spin_unlock(&devname_cache_lock);
2422 return ret; 2424 return ret;
@@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
2425 } 2427 }
2426 devcache[i] = new_dev; 2428 devcache[i] = new_dev;
2427 devcache[i]->device = device; 2429 devcache[i]->device = device;
2428 bd = bdget(device);
2429 if (bd) { 2430 if (bd) {
2430 bdevname(bd, devcache[i]->devname); 2431 bdevname(bd, devcache[i]->devname);
2431 bdput(bd); 2432 bdput(bd);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9ad321fd63fd..69fd93588118 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 1d1191050f99..05fa77a23711 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1403,7 +1403,7 @@ int jbd2_journal_stop(handle_t *handle)
1403 1403
1404 /* 1404 /*
1405 * Once we drop t_updates, if it goes to zero the transaction 1405 * Once we drop t_updates, if it goes to zero the transaction
1406 * could start commiting on us and eventually disappear. So 1406 * could start committing on us and eventually disappear. So
1407 * once we do this, we must not dereference transaction 1407 * once we do this, we must not dereference transaction
1408 * pointer again. 1408 * pointer again.
1409 */ 1409 */
diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO
index 5d3ea4070f01..ca28964abd4b 100644
--- a/fs/jffs2/TODO
+++ b/fs/jffs2/TODO
@@ -11,7 +11,7 @@
11 - checkpointing (do we need this? scan is quite fast) 11 - checkpointing (do we need this? scan is quite fast)
12 - make the scan code populate real inodes so read_inode just after 12 - make the scan code populate real inodes so read_inode just after
13 mount doesn't have to read the flash twice for large files. 13 mount doesn't have to read the flash twice for large files.
14 Make this a per-inode option, changable with chattr, so you can 14 Make this a per-inode option, changeable with chattr, so you can
15 decide which inodes should be in-core immediately after mount. 15 decide which inodes should be in-core immediately after mount.
16 - test, test, test 16 - test, test, test
17 17
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index d32ee9412cb9..2ab1a0d91210 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -24,7 +24,7 @@
24 * 24 *
25 * Returns: 0 if the data CRC is correct; 25 * Returns: 0 if the data CRC is correct;
26 * 1 - if incorrect; 26 * 1 - if incorrect;
27 * error code if an error occured. 27 * error code if an error occurred.
28 */ 28 */
29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) 29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
30{ 30{
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 800171dca53b..e537fb0e0184 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -121,7 +121,7 @@ int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri,
121 temp->nodetype = ri->nodetype; 121 temp->nodetype = ri->nodetype;
122 temp->inode = ri->ino; 122 temp->inode = ri->ino;
123 temp->version = ri->version; 123 temp->version = ri->version;
124 temp->offset = cpu_to_je32(ofs); /* relative offset from the begining of the jeb */ 124 temp->offset = cpu_to_je32(ofs); /* relative offset from the beginning of the jeb */
125 temp->totlen = ri->totlen; 125 temp->totlen = ri->totlen;
126 temp->next = NULL; 126 temp->next = NULL;
127 127
@@ -139,7 +139,7 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r
139 139
140 temp->nodetype = rd->nodetype; 140 temp->nodetype = rd->nodetype;
141 temp->totlen = rd->totlen; 141 temp->totlen = rd->totlen;
142 temp->offset = cpu_to_je32(ofs); /* relative from the begining of the jeb */ 142 temp->offset = cpu_to_je32(ofs); /* relative from the beginning of the jeb */
143 temp->pino = rd->pino; 143 temp->pino = rd->pino;
144 temp->version = rd->version; 144 temp->version = rd->version;
145 temp->ino = rd->ino; 145 temp->ino = rd->ino;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 07ee1546b2fa..4515bea0268f 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1116,7 +1116,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1116 1116
1117/* 1117/*
1118 * On NAND we try to mark this block bad. If the block was erased more 1118 * On NAND we try to mark this block bad. If the block was erased more
1119 * than MAX_ERASE_FAILURES we mark it finaly bad. 1119 * than MAX_ERASE_FAILURES we mark it finally bad.
1120 * Don't care about failures. This block remains on the erase-pending 1120 * Don't care about failures. This block remains on the erase-pending
1121 * or badblock list as long as nobody manipulates the flash with 1121 * or badblock list as long as nobody manipulates the flash with
1122 * a bootloader or something like that. 1122 * a bootloader or something like that.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c92ea3b3ea5e..4496872cf4e7 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1649,7 +1649,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1649 } 1649 }
1650 1650
1651 /* search the tree within the dmap control page for 1651 /* search the tree within the dmap control page for
1652 * sufficent free space. if sufficient free space is found, 1652 * sufficient free space. if sufficient free space is found,
1653 * dbFindLeaf() returns the index of the leaf at which 1653 * dbFindLeaf() returns the index of the leaf at which
1654 * free space was found. 1654 * free space was found.
1655 */ 1655 */
@@ -2744,7 +2744,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
2744 /* check which (leafno or buddy) is the left buddy. 2744 /* check which (leafno or buddy) is the left buddy.
2745 * the left buddy gets to claim the blocks resulting 2745 * the left buddy gets to claim the blocks resulting
2746 * from the join while the right gets to claim none. 2746 * from the join while the right gets to claim none.
2747 * the left buddy is also eligable to participate in 2747 * the left buddy is also eligible to participate in
2748 * a join at the next higher level while the right 2748 * a join at the next higher level while the right
2749 * is not. 2749 * is not.
2750 * 2750 *
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5d3bbd10f8db..e5fe8506ed16 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contiguous free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not available), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
132 * requested number of blocks rounded down to the next smaller 132 * requested number of blocks rounded down to the next smaller
@@ -481,7 +481,7 @@ int extFill(struct inode *ip, xad_t * xp)
481 * 481 *
482 * initially, we will try to allocate disk blocks for the 482 * initially, we will try to allocate disk blocks for the
483 * requested size (nblocks). if this fails (nblocks 483 * requested size (nblocks). if this fails (nblocks
484 * contiguous free blocks not avaliable), we'll try to allocate 484 * contiguous free blocks not available), we'll try to allocate
485 * a smaller number of blocks (producing a smaller extent), with 485 * a smaller number of blocks (producing a smaller extent), with
486 * this smaller number of blocks consisting of the requested 486 * this smaller number of blocks consisting of the requested
487 * number of blocks rounded down to the next smaller power of 2 487 * number of blocks rounded down to the next smaller power of 2
@@ -575,7 +575,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
575 * to a new set of blocks. If moving the extent, we initially 575 * to a new set of blocks. If moving the extent, we initially
576 * will try to allocate disk blocks for the requested size 576 * will try to allocate disk blocks for the requested size
577 * (newnblks). if this fails (new contiguous free blocks not 577 * (newnblks). if this fails (new contiguous free blocks not
578 * avaliable), we'll try to allocate a smaller number of 578 * available), we'll try to allocate a smaller number of
579 * blocks (producing a smaller extent), with this smaller 579 * blocks (producing a smaller extent), with this smaller
580 * number of blocks consisting of the requested number of 580 * number of blocks consisting of the requested number of
581 * blocks rounded down to the next smaller power of 2 581 * blocks rounded down to the next smaller power of 2
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 3a09423b6c22..ed53a4740168 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1069,7 +1069,7 @@ int diFree(struct inode *ip)
1069 */ 1069 */
1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071 /* in preparation for removing the iag from the 1071 /* in preparation for removing the iag from the
1072 * ag extent free list, read the iags preceeding 1072 * ag extent free list, read the iags preceding
1073 * and following the iag on the ag extent free 1073 * and following the iag on the ag extent free
1074 * list. 1074 * list.
1075 */ 1075 */
@@ -1095,7 +1095,7 @@ int diFree(struct inode *ip)
1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096 1096
1097 /* in preparation for removing the iag from the 1097 /* in preparation for removing the iag from the
1098 * ag inode free list, read the iags preceeding 1098 * ag inode free list, read the iags preceding
1099 * and following the iag on the ag inode free 1099 * and following the iag on the ag inode free
1100 * list. before reading these iags, we must make 1100 * list. before reading these iags, we must make
1101 * sure that we already don't have them in hand 1101 * sure that we already don't have them in hand
@@ -1681,7 +1681,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1681 * try to allocate a new extent of free inodes. 1681 * try to allocate a new extent of free inodes.
1682 */ 1682 */
1683 if (addext) { 1683 if (addext) {
1684 /* if free space is not avaliable for this new extent, try 1684 /* if free space is not available for this new extent, try
1685 * below to allocate a free and existing (already backed) 1685 * below to allocate a free and existing (already backed)
1686 * inode from the ag. 1686 * inode from the ag.
1687 */ 1687 */
@@ -2036,7 +2036,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2036 2036
2037 /* check if this is the last free inode within the iag. 2037 /* check if this is the last free inode within the iag.
2038 * if so, it will have to be removed from the ag free 2038 * if so, it will have to be removed from the ag free
2039 * inode list, so get the iags preceeding and following 2039 * inode list, so get the iags preceding and following
2040 * it on the list. 2040 * it on the list.
2041 */ 2041 */
2042 if (iagp->nfreeinos == cpu_to_le32(1)) { 2042 if (iagp->nfreeinos == cpu_to_le32(1)) {
@@ -2208,7 +2208,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2208 2208
2209 /* check if this is the last free extent within the 2209 /* check if this is the last free extent within the
2210 * iag. if so, the iag must be removed from the ag 2210 * iag. if so, the iag must be removed from the ag
2211 * free extent list, so get the iags preceeding and 2211 * free extent list, so get the iags preceding and
2212 * following the iag on this list. 2212 * following the iag on this list.
2213 */ 2213 */
2214 if (iagp->nfreeexts == cpu_to_le32(1)) { 2214 if (iagp->nfreeexts == cpu_to_le32(1)) {
@@ -2504,7 +2504,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2504 } 2504 }
2505 2505
2506 2506
2507 /* get the next avaliable iag number */ 2507 /* get the next available iag number */
2508 iagno = imap->im_nextiag; 2508 iagno = imap->im_nextiag;
2509 2509
2510 /* make sure that we have not exceeded the maximum inode 2510 /* make sure that we have not exceeded the maximum inode
@@ -2615,7 +2615,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2615 2615
2616 duplicateIXtree(sb, blkno, xlen, &xaddr); 2616 duplicateIXtree(sb, blkno, xlen, &xaddr);
2617 2617
2618 /* update the next avaliable iag number */ 2618 /* update the next available iag number */
2619 imap->im_nextiag += 1; 2619 imap->im_nextiag += 1;
2620 2620
2621 /* Add the iag to the iag free list so we don't lose the iag 2621 /* Add the iag to the iag free list so we don't lose the iag
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 9236bc49ae7f..e38c21598850 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -288,7 +288,7 @@ struct lrd {
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log up to syncpt address specified;
292 */ 292 */
293 struct { 293 struct {
294 __le32 sync; /* 4: syncpt address (0 = here) */ 294 __le32 sync; /* 4: syncpt address (0 = here) */
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index d94f8d9e87d7..a78beda85f68 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -75,7 +75,7 @@ extern void grab_metapage(struct metapage *);
75extern void force_metapage(struct metapage *); 75extern void force_metapage(struct metapage *);
76 76
77/* 77/*
78 * hold_metapage and put_metapage are used in conjuction. The page lock 78 * hold_metapage and put_metapage are used in conjunction. The page lock
79 * is not dropped between the two, so no other threads can get or release 79 * is not dropped between the two, so no other threads can get or release
80 * the metapage 80 * the metapage
81 */ 81 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 9466957ec841..f6cc0c09ec63 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -636,7 +636,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
636 * the inode of the page and available to all anonymous 636 * the inode of the page and available to all anonymous
637 * transactions until txCommit() time at which point 637 * transactions until txCommit() time at which point
638 * they are transferred to the transaction tlock list of 638 * they are transferred to the transaction tlock list of
639 * the commiting transaction of the inode) 639 * the committing transaction of the inode)
640 */ 640 */
641 if (xtid == 0) { 641 if (xtid == 0) {
642 tlck->tid = tid; 642 tlck->tid = tid;
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 1aba0039f1c9..8ea5efb5a34e 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -57,7 +57,7 @@
57 * 2. compute new FSCKSize from new LVSize; 57 * 2. compute new FSCKSize from new LVSize;
58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where 58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
59 * assert(new FSSize >= old FSSize), 59 * assert(new FSSize >= old FSSize),
60 * i.e., file system must not be shrinked; 60 * i.e., file system must not be shrunk;
61 */ 61 */
62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) 62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
63{ 63{
@@ -182,7 +182,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
182 */ 182 */
183 newFSSize = newLVSize - newLogSize - newFSCKSize; 183 newFSSize = newLVSize - newLogSize - newFSCKSize;
184 184
185 /* file system cannot be shrinked */ 185 /* file system cannot be shrunk */
186 if (newFSSize < bmp->db_mapsize) { 186 if (newFSSize < bmp->db_mapsize) {
187 rc = -EINVAL; 187 rc = -EINVAL;
188 goto out; 188 goto out;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index eeca48a031ab..06c8a67cbe76 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -644,7 +644,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644 644
645/* Read data from quotafile - avoid pagecache and such because we cannot afford 645/* Read data from quotafile - avoid pagecache and such because we cannot afford
646 * acquiring the locks... As quota files are never truncated and quota code 646 * acquiring the locks... As quota files are never truncated and quota code
647 * itself serializes the operations (and noone else should touch the files) 647 * itself serializes the operations (and no one else should touch the files)
648 * we don't have to be afraid of races */ 648 * we don't have to be afraid of races */
649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, 649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
650 size_t len, loff_t off) 650 size_t len, loff_t off)
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/prefetch.h>
13 14
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 16
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 7466e9dcc8c5..339e17e9133d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -60,7 +60,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
60 * asynchronous properties. So just to prevent the first implementor of such 60 * asynchronous properties. So just to prevent the first implementor of such
61 * a thing from breaking logfs in 2350, we do the usual pointless dance to 61 * a thing from breaking logfs in 2350, we do the usual pointless dance to
62 * declare a completion variable and wait for completion before returning 62 * declare a completion variable and wait for completion before returning
63 * from mtd_erase(). What an excercise in futility! 63 * from mtd_erase(). What an exercise in futility!
64 */ 64 */
65static void logfs_erase_callback(struct erase_info *ei) 65static void logfs_erase_callback(struct erase_info *ei)
66{ 66{
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9ddf0c388c8..9ed89d1663f8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -92,7 +92,7 @@ static int beyond_eof(struct inode *inode, loff_t bix)
92 * so short names (len <= 9) don't even occupy the complete 32bit name 92 * so short names (len <= 9) don't even occupy the complete 32bit name
93 * space. A prime >256 ensures short names quickly spread the 32bit 93 * space. A prime >256 ensures short names quickly spread the 32bit
94 * name space. Add about 26 for the estimated amount of information 94 * name space. Add about 26 for the estimated amount of information
95 * of each character and pick a prime nearby, preferrably a bit-sparse 95 * of each character and pick a prime nearby, preferably a bit-sparse
96 * one. 96 * one.
97 */ 97 */
98static u32 hash_32(const char *s, int len, u32 seed) 98static u32 hash_32(const char *s, int len, u32 seed)
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index ee99a9f5dfd3..9e22085231b3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1616,7 +1616,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1616 err = logfs_write_buf(inode, page, flags); 1616 err = logfs_write_buf(inode, page, flags);
1617 if (!err && shrink_level(gc_level) == 0) { 1617 if (!err && shrink_level(gc_level) == 0) {
1618 /* Rewrite cannot mark the inode dirty but has to 1618 /* Rewrite cannot mark the inode dirty but has to
1619 * write it immediatly. 1619 * write it immediately.
1620 * Q: Can't we just create an alias for the inode 1620 * Q: Can't we just create an alias for the inode
1621 * instead? And if not, why not? 1621 * instead? And if not, why not?
1622 */ 1622 */
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
480 !read_only) 480 !read_only)
481 return -EIO; 481 return -EIO;
482 482
483 mutex_init(&super->s_dirop_mutex);
484 mutex_init(&super->s_object_alias_mutex);
485 INIT_LIST_HEAD(&super->s_freeing_list);
486
487 ret = logfs_init_rw(sb); 483 ret = logfs_init_rw(sb);
488 if (ret) 484 if (ret)
489 return ret; 485 return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
601 if (!super) 597 if (!super)
602 return ERR_PTR(-ENOMEM); 598 return ERR_PTR(-ENOMEM);
603 599
600 mutex_init(&super->s_dirop_mutex);
601 mutex_init(&super->s_object_alias_mutex);
602 INIT_LIST_HEAD(&super->s_freeing_list);
603
604 if (!devname) 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname); 605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3)) 606 else if (strncmp(devname, "mtd", 3))
diff --git a/fs/mbcache.c b/fs/mbcache.c
index a25444ab2baf..2f174be06555 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -542,7 +542,7 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
542 * mb_cache_entry_find_first() 542 * mb_cache_entry_find_first()
543 * 543 *
544 * Find the first cache entry on a given device with a certain key in 544 * Find the first cache entry on a given device with a certain key in
545 * an additional index. Additonal matches can be found with 545 * an additional index. Additional matches can be found with
546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The 546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
547 * returned cache entry is locked for shared access ("multiple readers"). 547 * returned cache entry is locked for shared access ("multiple readers").
548 * 548 *
diff --git a/fs/namei.c b/fs/namei.c
index 3cb616d38d9c..e3c4f112ebf7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -70,7 +70,7 @@
70 * name indicated by the symlink. The old code always complained that the 70 * name indicated by the symlink. The old code always complained that the
71 * name already exists, due to not following the symlink even if its target 71 * name already exists, due to not following the symlink even if its target
72 * is nonexistent. The new semantics affects also mknod() and link() when 72 * is nonexistent. The new semantics affects also mknod() and link() when
73 * the name is a symlink pointing to a non-existant name. 73 * the name is a symlink pointing to a non-existent name.
74 * 74 *
75 * I don't know which semantics is the right one, since I have no access 75 * I don't know which semantics is the right one, since I have no access
76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) 180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
181{ 181{
182 umode_t mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
185 185
@@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
697 do { 697 do {
698 seq = read_seqcount_begin(&fs->seq); 698 seq = read_seqcount_begin(&fs->seq);
699 nd->root = fs->root; 699 nd->root = fs->root;
700 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
700 } while (read_seqcount_retry(&fs->seq, seq)); 701 } while (read_seqcount_retry(&fs->seq, seq));
701 } 702 }
702} 703}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7dba2ed03429..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
1030 .show = show_vfsmnt 1030 .show = show_vfsmnt
1031}; 1031};
1032 1032
1033static int uuid_is_nil(u8 *uuid)
1034{
1035 int i;
1036 u8 *cp = (u8 *)uuid;
1037
1038 for (i = 0; i < 16; i++) {
1039 if (*cp++)
1040 return 0;
1041 }
1042 return 1;
1043}
1044
1045static int show_mountinfo(struct seq_file *m, void *v) 1033static int show_mountinfo(struct seq_file *m, void *v)
1046{ 1034{
1047 struct proc_mounts *p = m->private; 1035 struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
1085 if (IS_MNT_UNBINDABLE(mnt)) 1073 if (IS_MNT_UNBINDABLE(mnt))
1086 seq_puts(m, " unbindable"); 1074 seq_puts(m, " unbindable");
1087 1075
1088 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1089 /* print the uuid */
1090 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1091
1092 /* Filesystem specific data */ 1076 /* Filesystem specific data */
1093 seq_puts(m, " - "); 1077 seq_puts(m, " - ");
1094 show_type(m, sb); 1078 show_type(m, sb);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 00a1d1c3d3a4..0250e4ce4893 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -596,7 +596,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
596/* server->priv.data = NULL; */ 596/* server->priv.data = NULL; */
597 597
598 server->m = data; 598 server->m = data;
599 /* Althought anything producing this is buggy, it happens 599 /* Although anything producing this is buggy, it happens
600 now because of PATH_MAX changes.. */ 600 now because of PATH_MAX changes.. */
601 if (server->m.time_out < 1) { 601 if (server->m.time_out < 1) {
602 server->m.time_out = 10; 602 server->m.time_out = 10;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 14e0f9371d14..00ecf62ce7c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -241,7 +241,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
241 241
242 args->cbl_layout_type = ntohl(*p++); 242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch, 243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types. 244 * as it is unusable and ignored with the other types.
245 */ 245 */
246 iomode = ntohl(*p++); 246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++); 247 args->cbl_layoutchanged = ntohl(*p++);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3ac5bd695e5e..2f093ed16980 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -301,7 +301,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
301 * disk, but it retrieves and clears ctx->error after synching, despite 301 * disk, but it retrieves and clears ctx->error after synching, despite
302 * the two being set at the same time in nfs_context_set_write_error(). 302 * the two being set at the same time in nfs_context_set_write_error().
303 * This is because the former is used to notify the _next_ call to 303 * This is because the former is used to notify the _next_ call to
304 * nfs_file_write() that a write error occured, and hence cause it to 304 * nfs_file_write() that a write error occurred, and hence cause it to
305 * fall back to doing a synchronous write. 305 * fall back to doing a synchronous write.
306 */ 306 */
307static int 307static int
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 9166fcb66da2..1f063bacd285 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
119} 119}
120 120
121#ifdef CONFIG_NFS_V4 121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode) 122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{ 123{
124 struct gss_api_mech *mech; 124 struct gss_api_mech *mech;
125 struct xdr_netobj oid; 125 struct xdr_netobj oid;
@@ -148,67 +148,64 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors,
148 return pseudoflavor; 148 return pseudoflavor;
149} 149}
150 150
151static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) 151static int nfs_negotiate_security(const struct dentry *parent,
152 const struct dentry *dentry,
153 rpc_authflavor_t *flavor)
152{ 154{
153 int status = 0;
154 struct page *page; 155 struct page *page;
155 struct nfs4_secinfo_flavors *flavors; 156 struct nfs4_secinfo_flavors *flavors;
156 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); 157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
157 rpc_authflavor_t flavor = RPC_AUTH_UNIX; 158 int ret = -EPERM;
158 159
159 secinfo = NFS_PROTO(parent->d_inode)->secinfo; 160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
160 if (secinfo != NULL) { 161 if (secinfo != NULL) {
161 page = alloc_page(GFP_KERNEL); 162 page = alloc_page(GFP_KERNEL);
162 if (!page) { 163 if (!page) {
163 status = -ENOMEM; 164 ret = -ENOMEM;
164 goto out; 165 goto out;
165 } 166 }
166 flavors = page_address(page); 167 flavors = page_address(page);
167 status = secinfo(parent->d_inode, &dentry->d_name, flavors); 168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
168 flavor = nfs_find_best_sec(flavors, dentry->d_inode); 169 *flavor = nfs_find_best_sec(flavors);
169 put_page(page); 170 put_page(page);
170 } 171 }
171 172
172 return flavor;
173
174out: 173out:
175 status = -ENOMEM; 174 return ret;
176 return status;
177} 175}
178 176
179static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, 177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
180 struct dentry *dentry, struct path *path, 178 struct dentry *dentry, struct path *path,
181 struct nfs_fh *fh, struct nfs_fattr *fattr) 179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
182{ 181{
183 rpc_authflavor_t flavor;
184 struct rpc_clnt *clone; 182 struct rpc_clnt *clone;
185 struct rpc_auth *auth; 183 struct rpc_auth *auth;
186 int err; 184 int err;
187 185
188 flavor = nfs_negotiate_security(parent, path->dentry); 186 err = nfs_negotiate_security(parent, path->dentry, flavor);
189 if (flavor < 0) 187 if (err < 0)
190 goto out; 188 goto out;
191 clone = rpc_clone_client(server->client); 189 clone = rpc_clone_client(server->client);
192 auth = rpcauth_create(flavor, clone); 190 auth = rpcauth_create(*flavor, clone);
193 if (!auth) { 191 if (!auth) {
194 flavor = -EIO; 192 err = -EIO;
195 goto out_shutdown; 193 goto out_shutdown;
196 } 194 }
197 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, 195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
198 &path->dentry->d_name, 196 &path->dentry->d_name,
199 fh, fattr); 197 fh, fattr);
200 if (err < 0)
201 flavor = err;
202out_shutdown: 198out_shutdown:
203 rpc_shutdown_client(clone); 199 rpc_shutdown_client(clone);
204out: 200out:
205 return flavor; 201 return err;
206} 202}
207#else /* CONFIG_NFS_V4 */ 203#else /* CONFIG_NFS_V4 */
208static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, 204static inline int nfs_lookup_with_sec(struct nfs_server *server,
209 struct dentry *parent, struct dentry *dentry, 205 struct dentry *parent, struct dentry *dentry,
210 struct path *path, struct nfs_fh *fh, 206 struct path *path, struct nfs_fh *fh,
211 struct nfs_fattr *fattr) 207 struct nfs_fattr *fattr,
208 rpc_authflavor_t *flavor)
212{ 209{
213 return -EPERM; 210 return -EPERM;
214} 211}
@@ -234,7 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
234 struct nfs_fh *fh = NULL; 231 struct nfs_fh *fh = NULL;
235 struct nfs_fattr *fattr = NULL; 232 struct nfs_fattr *fattr = NULL;
236 int err; 233 int err;
237 rpc_authflavor_t flavor = 1; 234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
238 235
239 dprintk("--> nfs_d_automount()\n"); 236 dprintk("--> nfs_d_automount()\n");
240 237
@@ -255,13 +252,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
255 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, 252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
256 &path->dentry->d_name, 253 &path->dentry->d_name,
257 fh, fattr); 254 fh, fattr);
258 if (err == -EPERM) { 255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
259 flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); 256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
260 if (flavor < 0)
261 err = flavor;
262 else
263 err = 0;
264 }
265 dput(parent); 257 dput(parent);
266 if (err != 0) { 258 if (err != 0) {
267 mnt = ERR_PTR(err); 259 mnt = ERR_PTR(err);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1c261ddd65d..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
47 NFS4CLNT_LAYOUTRECALL, 47 NFS4CLNT_LAYOUTRECALL,
48 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
49 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
50 NFS4CLNT_LEASE_CONFIRM,
50}; 51};
51 52
52enum nfs4_session_state { 53enum nfs4_session_state {
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 6f8192f4cfc7..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
117 case -EKEYEXPIRED: 117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); 118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break; 119 break;
120 case -NFS4ERR_RETRY_UNCACHED_REP:
121 break;
120 default: 122 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__, 123 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status); 124 task->tk_status);
@@ -416,7 +418,8 @@ static int
416filelayout_check_layout(struct pnfs_layout_hdr *lo, 418filelayout_check_layout(struct pnfs_layout_hdr *lo,
417 struct nfs4_filelayout_segment *fl, 419 struct nfs4_filelayout_segment *fl,
418 struct nfs4_layoutget_res *lgr, 420 struct nfs4_layoutget_res *lgr,
419 struct nfs4_deviceid *id) 421 struct nfs4_deviceid *id,
422 gfp_t gfp_flags)
420{ 423{
421 struct nfs4_file_layout_dsaddr *dsaddr; 424 struct nfs4_file_layout_dsaddr *dsaddr;
422 int status = -EINVAL; 425 int status = -EINVAL;
@@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
439 /* find and reference the deviceid */ 442 /* find and reference the deviceid */
440 dsaddr = nfs4_fl_find_get_deviceid(id); 443 dsaddr = nfs4_fl_find_get_deviceid(id);
441 if (dsaddr == NULL) { 444 if (dsaddr == NULL) {
442 dsaddr = get_device_info(lo->plh_inode, id); 445 dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
443 if (dsaddr == NULL) 446 if (dsaddr == NULL)
444 goto out; 447 goto out;
445 } 448 }
@@ -500,7 +503,8 @@ static int
500filelayout_decode_layout(struct pnfs_layout_hdr *flo, 503filelayout_decode_layout(struct pnfs_layout_hdr *flo,
501 struct nfs4_filelayout_segment *fl, 504 struct nfs4_filelayout_segment *fl,
502 struct nfs4_layoutget_res *lgr, 505 struct nfs4_layoutget_res *lgr,
503 struct nfs4_deviceid *id) 506 struct nfs4_deviceid *id,
507 gfp_t gfp_flags)
504{ 508{
505 struct xdr_stream stream; 509 struct xdr_stream stream;
506 struct xdr_buf buf = { 510 struct xdr_buf buf = {
@@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
516 520
517 dprintk("%s: set_layout_map Begin\n", __func__); 521 dprintk("%s: set_layout_map Begin\n", __func__);
518 522
519 scratch = alloc_page(GFP_KERNEL); 523 scratch = alloc_page(gfp_flags);
520 if (!scratch) 524 if (!scratch)
521 return -ENOMEM; 525 return -ENOMEM;
522 526
@@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
554 goto out_err; 558 goto out_err;
555 559
556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 560 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
557 GFP_KERNEL); 561 gfp_flags);
558 if (!fl->fh_array) 562 if (!fl->fh_array)
559 goto out_err; 563 goto out_err;
560 564
561 for (i = 0; i < fl->num_fh; i++) { 565 for (i = 0; i < fl->num_fh; i++) {
562 /* Do we want to use a mempool here? */ 566 /* Do we want to use a mempool here? */
563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 567 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
564 if (!fl->fh_array[i]) 568 if (!fl->fh_array[i])
565 goto out_err_free; 569 goto out_err_free;
566 570
@@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
605 609
606static struct pnfs_layout_segment * 610static struct pnfs_layout_segment *
607filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 611filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
608 struct nfs4_layoutget_res *lgr) 612 struct nfs4_layoutget_res *lgr,
613 gfp_t gfp_flags)
609{ 614{
610 struct nfs4_filelayout_segment *fl; 615 struct nfs4_filelayout_segment *fl;
611 int rc; 616 int rc;
612 struct nfs4_deviceid id; 617 struct nfs4_deviceid id;
613 618
614 dprintk("--> %s\n", __func__); 619 dprintk("--> %s\n", __func__);
615 fl = kzalloc(sizeof(*fl), GFP_KERNEL); 620 fl = kzalloc(sizeof(*fl), gfp_flags);
616 if (!fl) 621 if (!fl)
617 return NULL; 622 return NULL;
618 623
619 rc = filelayout_decode_layout(layoutid, fl, lgr, &id); 624 rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
620 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { 625 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
621 _filelayout_free_lseg(fl); 626 _filelayout_free_lseg(fl);
622 return NULL; 627 return NULL;
623 } 628 }
@@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
633 int size = (fl->stripe_type == STRIPE_SPARSE) ? 638 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 639 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635 640
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); 641 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
637 if (!fl->commit_buckets) { 642 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr); 643 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL; 644 return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 085a354e0f08..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,7 +33,7 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Field testing shows we need to support upto 4096 stripe indices. 36 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures. 39 * RFC 5661 multipath_list4 structures.
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
104nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); 104nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
105extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 105extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
106struct nfs4_file_layout_dsaddr * 106struct nfs4_file_layout_dsaddr *
107get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 107get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
108 108
109#endif /* FS_NFS_NFS4FILELAYOUT_H */ 109#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
225} 225}
226 226
227static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
229{ 229{
230 struct nfs4_pnfs_ds *tmp_ds, *ds; 230 struct nfs4_pnfs_ds *tmp_ds, *ds;
231 231
232 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); 232 ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
233 if (!ds) 233 if (!ds)
234 goto out; 234 goto out;
235 235
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
303 rlen); 303 rlen);
304 goto out_err; 304 goto out_err;
305 } 305 }
306 buf = kmalloc(rlen + 1, GFP_KERNEL); 306 buf = kmalloc(rlen + 1, gfp_flags);
307 if (!buf) { 307 if (!buf) {
308 dprintk("%s: Not enough memory\n", __func__); 308 dprintk("%s: Not enough memory\n", __func__);
309 goto out_err; 309 goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
333 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); 333 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
334 port = htons((tmp[0] << 8) | (tmp[1])); 334 port = htons((tmp[0] << 8) | (tmp[1]));
335 335
336 ds = nfs4_pnfs_ds_add(inode, ip_addr, port); 336 ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
337 dprintk("%s: Decoded address and port %s\n", __func__, buf); 337 dprintk("%s: Decoded address and port %s\n", __func__, buf);
338out_free: 338out_free:
339 kfree(buf); 339 kfree(buf);
@@ -343,7 +343,7 @@ out_err:
343 343
344/* Decode opaque device data and return the result */ 344/* Decode opaque device data and return the result */
345static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
346decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
347{ 347{
348 int i; 348 int i;
349 u32 cnt, num; 349 u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
362 struct page *scratch; 362 struct page *scratch;
363 363
364 /* set up xdr stream */ 364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL); 365 scratch = alloc_page(gfp_flags);
366 if (!scratch) 366 if (!scratch)
367 goto out_err; 367 goto out_err;
368 368
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
384 } 384 }
385 385
386 /* read stripe indices */ 386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); 387 stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
388 if (!stripe_indices) 388 if (!stripe_indices)
389 goto out_err_free_scratch; 389 goto out_err_free_scratch;
390 390
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
423 423
424 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
426 GFP_KERNEL); 426 gfp_flags);
427 if (!dsaddr) 427 if (!dsaddr)
428 goto out_err_free_stripe_indices; 428 goto out_err_free_stripe_indices;
429 429
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
452 for (j = 0; j < mp_count; j++) { 452 for (j = 0; j < mp_count; j++) {
453 if (j == 0) { 453 if (j == 0) {
454 dsaddr->ds_list[i] = decode_and_add_ds(&stream, 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino); 455 ino, gfp_flags);
456 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
457 goto out_err_free_deviceid; 457 goto out_err_free_deviceid;
458 } else { 458 } else {
@@ -503,12 +503,12 @@ out_err:
503 * available devices. 503 * available devices.
504 */ 504 */
505static struct nfs4_file_layout_dsaddr * 505static struct nfs4_file_layout_dsaddr *
506decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 506decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
507{ 507{
508 struct nfs4_file_layout_dsaddr *d, *new; 508 struct nfs4_file_layout_dsaddr *d, *new;
509 long hash; 509 long hash;
510 510
511 new = decode_device(inode, dev); 511 new = decode_device(inode, dev, gfp_flags);
512 if (!new) { 512 if (!new) {
513 printk(KERN_WARNING "%s: Could not decode or add device\n", 513 printk(KERN_WARNING "%s: Could not decode or add device\n",
514 __func__); 514 __func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
537 * of available devices, and return it. 537 * of available devices, and return it.
538 */ 538 */
539struct nfs4_file_layout_dsaddr * 539struct nfs4_file_layout_dsaddr *
540get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) 540get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
541{ 541{
542 struct pnfs_device *pdev = NULL; 542 struct pnfs_device *pdev = NULL;
543 u32 max_resp_sz; 543 u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
556 dprintk("%s inode %p max_resp_sz %u max_pages %d\n", 556 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
557 __func__, inode, max_resp_sz, max_pages); 557 __func__, inode, max_resp_sz, max_pages);
558 558
559 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); 559 pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
560 if (pdev == NULL) 560 if (pdev == NULL)
561 return NULL; 561 return NULL;
562 562
563 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); 563 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
564 if (pages == NULL) { 564 if (pages == NULL) {
565 kfree(pdev); 565 kfree(pdev);
566 return NULL; 566 return NULL;
567 } 567 }
568 for (i = 0; i < max_pages; i++) { 568 for (i = 0; i < max_pages; i++) {
569 pages[i] = alloc_page(GFP_KERNEL); 569 pages[i] = alloc_page(gfp_flags);
570 if (!pages[i]) 570 if (!pages[i])
571 goto out_free; 571 goto out_free;
572 } 572 }
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
587 * Found new device, need to decode it and then add it to the 587 * Found new device, need to decode it and then add it to the
588 * list of known devices for this mountpoint. 588 * list of known devices for this mountpoint.
589 */ 589 */
590 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
591out_free: 591out_free:
592 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
593 __free_page(pages[i]); 593 __free_page(pages[i]);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dfd1e6d7e6c3..cf1b339c3937 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -46,6 +46,7 @@
46#include <linux/nfs4.h> 46#include <linux/nfs4.h>
47#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
48#include <linux/nfs_page.h> 48#include <linux/nfs_page.h>
49#include <linux/nfs_mount.h>
49#include <linux/namei.h> 50#include <linux/namei.h>
50#include <linux/mount.h> 51#include <linux/mount.h>
51#include <linux/module.h> 52#include <linux/module.h>
@@ -299,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
299 ret = nfs4_delay(server->client, &exception->timeout); 300 ret = nfs4_delay(server->client, &exception->timeout);
300 if (ret != 0) 301 if (ret != 0)
301 break; 302 break;
303 case -NFS4ERR_RETRY_UNCACHED_REP:
302 case -NFS4ERR_OLD_STATEID: 304 case -NFS4ERR_OLD_STATEID:
303 exception->retry = 1; 305 exception->retry = 1;
304 break; 306 break;
@@ -443,8 +445,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
443 if (res->sr_status == 1) 445 if (res->sr_status == 1)
444 res->sr_status = NFS_OK; 446 res->sr_status = NFS_OK;
445 447
446 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 448 /* don't increment the sequence number if the task wasn't sent */
447 if (!res->sr_slot) 449 if (!RPC_WAS_SENT(task))
448 goto out; 450 goto out;
449 451
450 /* Check the SEQUENCE operation status */ 452 /* Check the SEQUENCE operation status */
@@ -2185,9 +2187,14 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2185 struct nfs4_exception exception = { }; 2187 struct nfs4_exception exception = { };
2186 int err; 2188 int err;
2187 do { 2189 do {
2188 err = nfs4_handle_exception(server, 2190 err = _nfs4_lookup_root(server, fhandle, info);
2189 _nfs4_lookup_root(server, fhandle, info), 2191 switch (err) {
2190 &exception); 2192 case 0:
2193 case -NFS4ERR_WRONGSEC:
2194 break;
2195 default:
2196 err = nfs4_handle_exception(server, err, &exception);
2197 }
2191 } while (exception.retry); 2198 } while (exception.retry);
2192 return err; 2199 return err;
2193} 2200}
@@ -2204,31 +2211,51 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
2204 goto out; 2211 goto out;
2205 } 2212 }
2206 ret = nfs4_lookup_root(server, fhandle, info); 2213 ret = nfs4_lookup_root(server, fhandle, info);
2207 if (ret < 0)
2208 ret = -EAGAIN;
2209out: 2214out:
2210 return ret; 2215 return ret;
2211} 2216}
2212 2217
2213/* 2218static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2214 * get the file handle for the "/" directory on the server
2215 */
2216static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2217 struct nfs_fsinfo *info) 2219 struct nfs_fsinfo *info)
2218{ 2220{
2219 int i, len, status = 0; 2221 int i, len, status = 0;
2220 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2]; 2222 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
2221 2223
2222 flav_array[0] = RPC_AUTH_UNIX; 2224 len = gss_mech_list_pseudoflavors(&flav_array[0]);
2223 len = gss_mech_list_pseudoflavors(&flav_array[1]); 2225 flav_array[len] = RPC_AUTH_NULL;
2224 flav_array[1+len] = RPC_AUTH_NULL; 2226 len += 1;
2225 len += 2;
2226 2227
2227 for (i = 0; i < len; i++) { 2228 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); 2229 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == 0) 2230 if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
2230 break; 2231 continue;
2232 break;
2231 } 2233 }
2234 /*
2235 * -EACCESS could mean that the user doesn't have correct permissions
2236 * to access the mount. It could also mean that we tried to mount
2237 * with a gss auth flavor, but rpc.gssd isn't running. Either way,
2238 * existing mount programs don't handle -EACCES very well so it should
2239 * be mapped to -EPERM instead.
2240 */
2241 if (status == -EACCES)
2242 status = -EPERM;
2243 return status;
2244}
2245
2246/*
2247 * get the file handle for the "/" directory on the server
2248 */
2249static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2250 struct nfs_fsinfo *info)
2251{
2252 int status = nfs4_lookup_root(server, fhandle, info);
2253 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
2254 /*
2255 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
2256 * by nfs4_map_errors() as this function exits.
2257 */
2258 status = nfs4_find_root_sec(server, fhandle, info);
2232 if (status == 0) 2259 if (status == 0)
2233 status = nfs4_server_capabilities(server, fhandle); 2260 status = nfs4_server_capabilities(server, fhandle);
2234 if (status == 0) 2261 if (status == 0)
@@ -3669,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3669 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3696 rpc_delay(task, NFS4_POLL_RETRY_MAX);
3670 task->tk_status = 0; 3697 task->tk_status = 0;
3671 return -EAGAIN; 3698 return -EAGAIN;
3699 case -NFS4ERR_RETRY_UNCACHED_REP:
3672 case -NFS4ERR_OLD_STATEID: 3700 case -NFS4ERR_OLD_STATEID:
3673 task->tk_status = 0; 3701 task->tk_status = 0;
3674 return -EAGAIN; 3702 return -EAGAIN;
@@ -3725,21 +3753,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3725 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 3753 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
3726 clp->cl_ipaddr, port >> 8, port & 255); 3754 clp->cl_ipaddr, port >> 8, port & 255);
3727 3755
3728 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3756 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3729 if (status != -NFS4ERR_CLID_INUSE) 3757 if (status != -NFS4ERR_CLID_INUSE)
3730 break; 3758 break;
3731 if (signalled()) 3759 if (loop != 0) {
3760 ++clp->cl_id_uniquifier;
3732 break; 3761 break;
3733 if (loop++ & 1) 3762 }
3734 ssleep(clp->cl_lease_time / HZ + 1); 3763 ++loop;
3735 else 3764 ssleep(clp->cl_lease_time / HZ + 1);
3736 if (++clp->cl_id_uniquifier == 0)
3737 break;
3738 } 3765 }
3739 return status; 3766 return status;
3740} 3767}
3741 3768
3742static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3769int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3743 struct nfs4_setclientid_res *arg, 3770 struct nfs4_setclientid_res *arg,
3744 struct rpc_cred *cred) 3771 struct rpc_cred *cred)
3745{ 3772{
@@ -3754,7 +3781,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3754 int status; 3781 int status;
3755 3782
3756 now = jiffies; 3783 now = jiffies;
3757 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3784 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3758 if (status == 0) { 3785 if (status == 0) {
3759 spin_lock(&clp->cl_lock); 3786 spin_lock(&clp->cl_lock);
3760 clp->cl_lease_time = fsinfo.lease_time * HZ; 3787 clp->cl_lease_time = fsinfo.lease_time * HZ;
@@ -3764,26 +3791,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3764 return status; 3791 return status;
3765} 3792}
3766 3793
3767int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3768 struct nfs4_setclientid_res *arg,
3769 struct rpc_cred *cred)
3770{
3771 long timeout = 0;
3772 int err;
3773 do {
3774 err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
3775 switch (err) {
3776 case 0:
3777 return err;
3778 case -NFS4ERR_RESOURCE:
3779 /* The IBM lawyers misread another document! */
3780 case -NFS4ERR_DELAY:
3781 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3782 }
3783 } while (err == 0);
3784 return err;
3785}
3786
3787struct nfs4_delegreturndata { 3794struct nfs4_delegreturndata {
3788 struct nfs4_delegreturnargs args; 3795 struct nfs4_delegreturnargs args;
3789 struct nfs4_delegreturnres res; 3796 struct nfs4_delegreturnres res;
@@ -4788,7 +4795,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4788 init_utsname()->domainname, 4795 init_utsname()->domainname,
4789 clp->cl_rpcclient->cl_auth->au_flavor); 4796 clp->cl_rpcclient->cl_auth->au_flavor);
4790 4797
4791 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 4798 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4792 if (!status) 4799 if (!status)
4793 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4800 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4794 dprintk("<-- %s status= %d\n", __func__, status); 4801 dprintk("<-- %s status= %d\n", __func__, status);
@@ -4839,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4839 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4846 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4840 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4847 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4841 task->tk_status = 0; 4848 task->tk_status = 0;
4849 /* fall through */
4850 case -NFS4ERR_RETRY_UNCACHED_REP:
4842 nfs_restart_rpc(task, data->clp); 4851 nfs_restart_rpc(task, data->clp);
4843 return; 4852 return;
4844 } 4853 }
@@ -4871,7 +4880,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4871 .rpc_client = clp->cl_rpcclient, 4880 .rpc_client = clp->cl_rpcclient,
4872 .rpc_message = &msg, 4881 .rpc_message = &msg,
4873 .callback_ops = &nfs4_get_lease_time_ops, 4882 .callback_ops = &nfs4_get_lease_time_ops,
4874 .callback_data = &data 4883 .callback_data = &data,
4884 .flags = RPC_TASK_TIMEOUT,
4875 }; 4885 };
4876 int status; 4886 int status;
4877 4887
@@ -5173,7 +5183,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5173 nfs4_init_channel_attrs(&args); 5183 nfs4_init_channel_attrs(&args);
5174 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); 5184 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
5175 5185
5176 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5186 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5177 5187
5178 if (!status) 5188 if (!status)
5179 /* Verify the session's negotiated channel_attrs values */ 5189 /* Verify the session's negotiated channel_attrs values */
@@ -5196,20 +5206,10 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5196 int status; 5206 int status;
5197 unsigned *ptr; 5207 unsigned *ptr;
5198 struct nfs4_session *session = clp->cl_session; 5208 struct nfs4_session *session = clp->cl_session;
5199 long timeout = 0;
5200 int err;
5201 5209
5202 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5210 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5203 5211
5204 do { 5212 status = _nfs4_proc_create_session(clp);
5205 status = _nfs4_proc_create_session(clp);
5206 if (status == -NFS4ERR_DELAY) {
5207 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5208 if (err)
5209 status = err;
5210 }
5211 } while (status == -NFS4ERR_DELAY);
5212
5213 if (status) 5213 if (status)
5214 goto out; 5214 goto out;
5215 5215
@@ -5250,7 +5250,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5250 msg.rpc_argp = session; 5250 msg.rpc_argp = session;
5251 msg.rpc_resp = NULL; 5251 msg.rpc_resp = NULL;
5252 msg.rpc_cred = NULL; 5252 msg.rpc_cred = NULL;
5253 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5253 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5254 5254
5255 if (status) 5255 if (status)
5256 printk(KERN_WARNING 5256 printk(KERN_WARNING
@@ -5483,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5483 break; 5483 break;
5484 case -NFS4ERR_DELAY: 5484 case -NFS4ERR_DELAY:
5485 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5485 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5486 /* fall through */
5487 case -NFS4ERR_RETRY_UNCACHED_REP:
5486 return -EAGAIN; 5488 return -EAGAIN;
5487 default: 5489 default:
5488 nfs4_schedule_lease_recovery(clp); 5490 nfs4_schedule_lease_recovery(clp);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a6804f704d9d..036f5adc9e1f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
64 64
65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
66{ 66{
67 struct nfs4_setclientid_res clid; 67 struct nfs4_setclientid_res clid = {
68 .clientid = clp->cl_clientid,
69 .confirm = clp->cl_confirm,
70 };
68 unsigned short port; 71 unsigned short port;
69 int status; 72 int status;
70 73
74 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
75 goto do_confirm;
71 port = nfs_callback_tcpport; 76 port = nfs_callback_tcpport;
72 if (clp->cl_addr.ss_family == AF_INET6) 77 if (clp->cl_addr.ss_family == AF_INET6)
73 port = nfs_callback_tcpport6; 78 port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
75 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); 80 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
76 if (status != 0) 81 if (status != 0)
77 goto out; 82 goto out;
83 clp->cl_clientid = clid.clientid;
84 clp->cl_confirm = clid.confirm;
85 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
86do_confirm:
78 status = nfs4_proc_setclientid_confirm(clp, &clid, cred); 87 status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
79 if (status != 0) 88 if (status != 0)
80 goto out; 89 goto out;
81 clp->cl_clientid = clid.clientid; 90 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
82 nfs4_schedule_state_renewal(clp); 91 nfs4_schedule_state_renewal(clp);
83out: 92out:
84 return status; 93 return status;
@@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
230{ 239{
231 int status; 240 int status;
232 241
242 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
243 goto do_confirm;
233 nfs4_begin_drain_session(clp); 244 nfs4_begin_drain_session(clp);
234 status = nfs4_proc_exchange_id(clp, cred); 245 status = nfs4_proc_exchange_id(clp, cred);
235 if (status != 0) 246 if (status != 0)
236 goto out; 247 goto out;
248 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
249do_confirm:
237 status = nfs4_proc_create_session(clp); 250 status = nfs4_proc_create_session(clp);
238 if (status != 0) 251 if (status != 0)
239 goto out; 252 goto out;
253 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
240 nfs41_setup_state_renewal(clp); 254 nfs41_setup_state_renewal(clp);
241 nfs_mark_client_ready(clp, NFS_CS_READY); 255 nfs_mark_client_ready(clp, NFS_CS_READY);
242out: 256out:
@@ -1584,20 +1598,23 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1584 */ 1598 */
1585static void nfs4_set_lease_expired(struct nfs_client *clp, int status) 1599static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1586{ 1600{
1587 if (nfs4_has_session(clp)) { 1601 switch (status) {
1588 switch (status) { 1602 case -NFS4ERR_CLID_INUSE:
1589 case -NFS4ERR_DELAY: 1603 case -NFS4ERR_STALE_CLIENTID:
1590 case -NFS4ERR_CLID_INUSE: 1604 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1591 case -EAGAIN: 1605 break;
1592 break; 1606 case -NFS4ERR_DELAY:
1607 case -ETIMEDOUT:
1608 case -EAGAIN:
1609 ssleep(1);
1610 break;
1593 1611
1594 case -EKEYEXPIRED: 1612 case -EKEYEXPIRED:
1595 nfs4_warn_keyexpired(clp->cl_hostname); 1613 nfs4_warn_keyexpired(clp->cl_hostname);
1596 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1614 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1597 * in nfs4_exchange_id */ 1615 * in nfs4_exchange_id */
1598 default: 1616 default:
1599 return; 1617 return;
1600 }
1601 } 1618 }
1602 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1619 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1603} 1620}
@@ -1607,7 +1624,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1607 int status = 0; 1624 int status = 0;
1608 1625
1609 /* Ensure exclusive access to NFSv4 state */ 1626 /* Ensure exclusive access to NFSv4 state */
1610 for(;;) { 1627 do {
1611 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1628 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1612 /* We're going to have to re-establish a clientid */ 1629 /* We're going to have to re-establish a clientid */
1613 status = nfs4_reclaim_lease(clp); 1630 status = nfs4_reclaim_lease(clp);
@@ -1691,7 +1708,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1691 break; 1708 break;
1692 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1709 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1693 break; 1710 break;
1694 } 1711 } while (atomic_read(&clp->cl_count) > 1);
1695 return; 1712 return;
1696out_error: 1713out_error:
1697 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1714 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dddfb5795d7b..c3ccd2c46834 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1452 1452
1453static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1453static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1454{ 1454{
1455 uint32_t attrs[2] = {0, 0}; 1455 uint32_t attrs[2] = {
1456 FATTR4_WORD0_RDATTR_ERROR,
1457 FATTR4_WORD1_MOUNTED_ON_FILEID,
1458 };
1456 uint32_t dircount = readdir->count >> 1; 1459 uint32_t dircount = readdir->count >> 1;
1457 __be32 *p; 1460 __be32 *p;
1458 1461
1459 if (readdir->plus) { 1462 if (readdir->plus) {
1460 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| 1463 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1461 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; 1464 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
1462 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| 1465 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1463 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| 1466 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1464 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| 1467 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1465 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 1468 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1466 dircount >>= 1; 1469 dircount >>= 1;
1467 } 1470 }
1468 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; 1471 /* Use mounted_on_fileid only if the server supports it */
1469 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; 1472 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
1470 /* Switch to mounted_on_fileid if the server supports it */ 1473 attrs[0] |= FATTR4_WORD0_FILEID;
1471 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1472 attrs[0] &= ~FATTR4_WORD0_FILEID;
1473 else
1474 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1475 1474
1476 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1475 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1477 *p++ = cpu_to_be32(OP_READDIR); 1476 *p++ = cpu_to_be32(OP_READDIR);
@@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
3140 goto out_overflow; 3139 goto out_overflow;
3141 xdr_decode_hyper(p, fileid); 3140 xdr_decode_hyper(p, fileid);
3142 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 3141 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
3143 ret = NFS_ATTR_FATTR_FILEID; 3142 ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
3144 } 3143 }
3145 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 3144 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
3146 return ret; 3145 return ret;
@@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4002{ 4001{
4003 int status; 4002 int status;
4004 umode_t fmode = 0; 4003 umode_t fmode = 0;
4005 uint64_t fileid;
4006 uint32_t type; 4004 uint32_t type;
4007 4005
4008 status = decode_attr_type(xdr, bitmap, &type); 4006 status = decode_attr_type(xdr, bitmap, &type);
@@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4101 goto xdr_error; 4099 goto xdr_error;
4102 fattr->valid |= status; 4100 fattr->valid |= status;
4103 4101
4104 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); 4102 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
4105 if (status < 0) 4103 if (status < 0)
4106 goto xdr_error; 4104 goto xdr_error;
4107 if (status != 0 && !(fattr->valid & status)) { 4105 fattr->valid |= status;
4108 fattr->fileid = fileid;
4109 fattr->valid |= status;
4110 }
4111 4106
4112xdr_error: 4107xdr_error:
4113 dprintk("%s: xdr returned %d\n", __func__, -status); 4108 dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -4838,17 +4833,21 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4838 struct nfs4_secinfo_flavor *sec_flavor; 4833 struct nfs4_secinfo_flavor *sec_flavor;
4839 int status; 4834 int status;
4840 __be32 *p; 4835 __be32 *p;
4841 int i; 4836 int i, num_flavors;
4842 4837
4843 status = decode_op_hdr(xdr, OP_SECINFO); 4838 status = decode_op_hdr(xdr, OP_SECINFO);
4839 if (status)
4840 goto out;
4844 p = xdr_inline_decode(xdr, 4); 4841 p = xdr_inline_decode(xdr, 4);
4845 if (unlikely(!p)) 4842 if (unlikely(!p))
4846 goto out_overflow; 4843 goto out_overflow;
4847 res->flavors->num_flavors = be32_to_cpup(p);
4848 4844
4849 for (i = 0; i < res->flavors->num_flavors; i++) { 4845 res->flavors->num_flavors = 0;
4846 num_flavors = be32_to_cpup(p);
4847
4848 for (i = 0; i < num_flavors; i++) {
4850 sec_flavor = &res->flavors->flavors[i]; 4849 sec_flavor = &res->flavors->flavors[i];
4851 if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE) 4850 if ((char *)&sec_flavor[1] - (char *)res->flavors > PAGE_SIZE)
4852 break; 4851 break;
4853 4852
4854 p = xdr_inline_decode(xdr, 4); 4853 p = xdr_inline_decode(xdr, 4);
@@ -4857,13 +4856,15 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4857 sec_flavor->flavor = be32_to_cpup(p); 4856 sec_flavor->flavor = be32_to_cpup(p);
4858 4857
4859 if (sec_flavor->flavor == RPC_AUTH_GSS) { 4858 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4860 if (decode_secinfo_gss(xdr, sec_flavor)) 4859 status = decode_secinfo_gss(xdr, sec_flavor);
4861 break; 4860 if (status)
4861 goto out;
4862 } 4862 }
4863 res->flavors->num_flavors++;
4863 } 4864 }
4864 4865
4865 return 0; 4866out:
4866 4867 return status;
4867out_overflow: 4868out_overflow:
4868 print_overflow_msg(__func__, xdr); 4869 print_overflow_msg(__func__, xdr);
4869 return -EIO; 4870 return -EIO;
@@ -6408,7 +6409,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6408 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6409 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6409 entry->server, 1) < 0) 6410 entry->server, 1) < 0)
6410 goto out_overflow; 6411 goto out_overflow;
6411 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6412 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6413 entry->ino = entry->fattr->mounted_on_fileid;
6414 else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6412 entry->ino = entry->fattr->fileid; 6415 entry->ino = entry->fattr->fileid;
6413 6416
6414 entry->d_type = DT_UNKNOWN; 6417 entry->d_type = DT_UNKNOWN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9ab97269ce6..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
383 plh_layouts); 383 plh_layouts);
384 dprintk("%s freeing layout for inode %lu\n", __func__, 384 dprintk("%s freeing layout for inode %lu\n", __func__,
385 lo->plh_inode->i_ino); 385 lo->plh_inode->i_ino);
386 list_del_init(&lo->plh_layouts);
386 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 387 pnfs_destroy_layout(NFS_I(lo->plh_inode));
387 } 388 }
388} 389}
@@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
466static struct pnfs_layout_segment * 467static struct pnfs_layout_segment *
467send_layoutget(struct pnfs_layout_hdr *lo, 468send_layoutget(struct pnfs_layout_hdr *lo,
468 struct nfs_open_context *ctx, 469 struct nfs_open_context *ctx,
469 u32 iomode) 470 u32 iomode,
471 gfp_t gfp_flags)
470{ 472{
471 struct inode *ino = lo->plh_inode; 473 struct inode *ino = lo->plh_inode;
472 struct nfs_server *server = NFS_SERVER(ino); 474 struct nfs_server *server = NFS_SERVER(ino);
@@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
479 dprintk("--> %s\n", __func__); 481 dprintk("--> %s\n", __func__);
480 482
481 BUG_ON(ctx == NULL); 483 BUG_ON(ctx == NULL);
482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 484 lgp = kzalloc(sizeof(*lgp), gfp_flags);
483 if (lgp == NULL) 485 if (lgp == NULL)
484 return NULL; 486 return NULL;
485 487
@@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 489 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT; 490 max_pages = max_resp_sz >> PAGE_SHIFT;
489 491
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); 492 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
491 if (!pages) 493 if (!pages)
492 goto out_err_free; 494 goto out_err_free;
493 495
494 for (i = 0; i < max_pages; i++) { 496 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL); 497 pages[i] = alloc_page(gfp_flags);
496 if (!pages[i]) 498 if (!pages[i])
497 goto out_err_free; 499 goto out_err_free;
498 } 500 }
@@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
508 lgp->args.layout.pages = pages; 510 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE; 511 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
510 lgp->lsegpp = &lseg; 512 lgp->lsegpp = &lseg;
513 lgp->gfp_flags = gfp_flags;
511 514
512 /* Synchronously retrieve layout information from server and 515 /* Synchronously retrieve layout information from server and
513 * store in lseg. 516 * store in lseg.
@@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
665} 668}
666 669
667static struct pnfs_layout_hdr * 670static struct pnfs_layout_hdr *
668alloc_init_layout_hdr(struct inode *ino) 671alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
669{ 672{
670 struct pnfs_layout_hdr *lo; 673 struct pnfs_layout_hdr *lo;
671 674
672 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); 675 lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
673 if (!lo) 676 if (!lo)
674 return NULL; 677 return NULL;
675 atomic_set(&lo->plh_refcount, 1); 678 atomic_set(&lo->plh_refcount, 1);
@@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
681} 684}
682 685
683static struct pnfs_layout_hdr * 686static struct pnfs_layout_hdr *
684pnfs_find_alloc_layout(struct inode *ino) 687pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
685{ 688{
686 struct nfs_inode *nfsi = NFS_I(ino); 689 struct nfs_inode *nfsi = NFS_I(ino);
687 struct pnfs_layout_hdr *new = NULL; 690 struct pnfs_layout_hdr *new = NULL;
@@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
696 return nfsi->layout; 699 return nfsi->layout;
697 } 700 }
698 spin_unlock(&ino->i_lock); 701 spin_unlock(&ino->i_lock);
699 new = alloc_init_layout_hdr(ino); 702 new = alloc_init_layout_hdr(ino, gfp_flags);
700 spin_lock(&ino->i_lock); 703 spin_lock(&ino->i_lock);
701 704
702 if (likely(nfsi->layout == NULL)) /* Won the race? */ 705 if (likely(nfsi->layout == NULL)) /* Won the race? */
@@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
756struct pnfs_layout_segment * 759struct pnfs_layout_segment *
757pnfs_update_layout(struct inode *ino, 760pnfs_update_layout(struct inode *ino,
758 struct nfs_open_context *ctx, 761 struct nfs_open_context *ctx,
759 enum pnfs_iomode iomode) 762 enum pnfs_iomode iomode,
763 gfp_t gfp_flags)
760{ 764{
761 struct nfs_inode *nfsi = NFS_I(ino); 765 struct nfs_inode *nfsi = NFS_I(ino);
762 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 766 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
767 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 771 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
768 return NULL; 772 return NULL;
769 spin_lock(&ino->i_lock); 773 spin_lock(&ino->i_lock);
770 lo = pnfs_find_alloc_layout(ino); 774 lo = pnfs_find_alloc_layout(ino, gfp_flags);
771 if (lo == NULL) { 775 if (lo == NULL) {
772 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); 776 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
773 goto out_unlock; 777 goto out_unlock;
@@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
807 spin_unlock(&clp->cl_lock); 811 spin_unlock(&clp->cl_lock);
808 } 812 }
809 813
810 lseg = send_layoutget(lo, ctx, iomode); 814 lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
811 if (!lseg && first) { 815 if (!lseg && first) {
812 spin_lock(&clp->cl_lock); 816 spin_lock(&clp->cl_lock);
813 list_del_init(&lo->plh_layouts); 817 list_del_init(&lo->plh_layouts);
@@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
846 goto out; 850 goto out;
847 } 851 }
848 /* Inject layout blob into I/O device driver */ 852 /* Inject layout blob into I/O device driver */
849 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); 853 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
850 if (!lseg || IS_ERR(lseg)) { 854 if (!lseg || IS_ERR(lseg)) {
851 if (!lseg) 855 if (!lseg)
852 status = -ENOMEM; 856 status = -ENOMEM;
@@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
899 /* This is first coelesce call for a series of nfs_pages */ 903 /* This is first coelesce call for a series of nfs_pages */
900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 904 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
901 prev->wb_context, 905 prev->wb_context,
902 IOMODE_READ); 906 IOMODE_READ,
907 GFP_KERNEL);
903 } 908 }
904 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 909 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
905} 910}
@@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
921 /* This is first coelesce call for a series of nfs_pages */ 926 /* This is first coelesce call for a series of nfs_pages */
922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 927 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
923 prev->wb_context, 928 prev->wb_context,
924 IOMODE_RW); 929 IOMODE_RW,
930 GFP_NOFS);
925 } 931 }
926 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 932 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
927} 933}
@@ -1004,6 +1010,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{ 1010{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1011 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count; 1012 loff_t end_pos = wdata->args.offset + wdata->res.count;
1013 bool mark_as_dirty = false;
1007 1014
1008 spin_lock(&nfsi->vfs_inode.i_lock); 1015 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1016 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
@@ -1011,13 +1018,18 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1011 get_lseg(wdata->lseg); 1018 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred = 1019 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred); 1020 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode); 1021 mark_as_dirty = true;
1015 dprintk("%s: Set layoutcommit for inode %lu ", 1022 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino); 1023 __func__, wdata->inode->i_ino);
1017 } 1024 }
1018 if (end_pos > wdata->lseg->pls_end_pos) 1025 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos; 1026 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock); 1027 spin_unlock(&nfsi->vfs_inode.i_lock);
1028
1029 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1030 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1031 if (mark_as_dirty)
1032 mark_inode_dirty_sync(wdata->inode);
1021} 1033}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1034EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023 1035
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
70 const u32 id; 70 const u32 id;
71 const char *name; 71 const char *name;
72 struct module *owner; 72 struct module *owner;
73 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 73 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
74 void (*free_lseg) (struct pnfs_layout_segment *lseg); 74 void (*free_lseg) (struct pnfs_layout_segment *lseg);
75 75
76 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
126void put_lseg(struct pnfs_layout_segment *lseg); 126void put_lseg(struct pnfs_layout_segment *lseg);
127struct pnfs_layout_segment * 127struct pnfs_layout_segment *
128pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 128pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
129 enum pnfs_iomode access_type); 129 enum pnfs_iomode access_type, gfp_t gfp_flags);
130void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 130void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
131void unset_pnfs_layoutdriver(struct nfs_server *); 131void unset_pnfs_layoutdriver(struct nfs_server *);
132enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 132enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
245 245
246static inline struct pnfs_layout_segment * 246static inline struct pnfs_layout_segment *
247pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 247pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
248 enum pnfs_iomode access_type) 248 enum pnfs_iomode access_type, gfp_t gfp_flags)
249{ 249{
250 return NULL; 250 return NULL;
251} 251}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
288 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
289 289
290 BUG_ON(desc->pg_lseg != NULL); 290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
292 ClearPageError(page); 292 ClearPageError(page);
293 offset = 0; 293 offset = 0;
294 nbytes = desc->pg_count; 294 nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
351 } 351 }
352 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages)) 353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
355 355
356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
357 0, lseg); 357 0, lseg);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2b8e9a5e366a..e288f06d3fa7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1004,6 +1004,7 @@ static int nfs_parse_security_flavors(char *value,
1004 return 0; 1004 return 0;
1005 } 1005 }
1006 1006
1007 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
1007 mnt->auth_flavor_len = 1; 1008 mnt->auth_flavor_len = 1;
1008 return 1; 1009 return 1;
1009} 1010}
@@ -1976,6 +1977,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
1976 if (error < 0) 1977 if (error < 0)
1977 goto out; 1978 goto out;
1978 1979
1980 /*
1981 * noac is a special case. It implies -o sync, but that's not
1982 * necessarily reflected in the mtab options. do_remount_sb
1983 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
1984 * remount options, so we have to explicitly reset it.
1985 */
1986 if (data->flags & NFS_MOUNT_NOAC)
1987 *flags |= MS_SYNCHRONOUS;
1988
1979 /* compare new mount options with old ones */ 1989 /* compare new mount options with old ones */
1980 error = nfs_compare_remount_data(nfss, data); 1990 error = nfs_compare_remount_data(nfss, data);
1981out: 1991out:
@@ -2235,8 +2245,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2235 if (!s->s_root) { 2245 if (!s->s_root) {
2236 /* initial superblock/root creation */ 2246 /* initial superblock/root creation */
2237 nfs_fill_super(s, data); 2247 nfs_fill_super(s, data);
2238 nfs_fscache_get_super_cookie( 2248 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2239 s, data ? data->fscache_uniq : NULL, NULL);
2240 } 2249 }
2241 2250
2242 mntroot = nfs_get_root(s, mntfh, dev_name); 2251 mntroot = nfs_get_root(s, mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af0c6279a4a7..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -542,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
542 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
543 return 0; 543 return 0;
544 544
545 spin_lock(&inode->i_lock);
545 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
546 if (ret > 0) 547 if (ret > 0)
547 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
548 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
549 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
550 return ret; 554 return ret;
551} 555}
552#else 556#else
@@ -676,7 +680,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
676 req = nfs_setup_write_request(ctx, page, offset, count); 680 req = nfs_setup_write_request(ctx, page, offset, count);
677 if (IS_ERR(req)) 681 if (IS_ERR(req))
678 return PTR_ERR(req); 682 return PTR_ERR(req);
679 nfs_mark_request_dirty(req);
680 /* Update file length */ 683 /* Update file length */
681 nfs_grow_file(page, offset, count); 684 nfs_grow_file(page, offset, count);
682 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -936,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
936 atomic_set(&req->wb_complete, requests); 939 atomic_set(&req->wb_complete, requests);
937 940
938 BUG_ON(desc->pg_lseg); 941 BUG_ON(desc->pg_lseg);
939 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
940 ClearPageError(page); 943 ClearPageError(page);
941 offset = 0; 944 offset = 0;
942 nbytes = desc->pg_count; 945 nbytes = desc->pg_count;
@@ -1010,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1010 } 1013 }
1011 req = nfs_list_entry(data->pages.next); 1014 req = nfs_list_entry(data->pages.next);
1012 if ((!lseg) && list_is_singular(&data->pages)) 1015 if ((!lseg) && list_is_singular(&data->pages))
1013 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
1014 1017
1015 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1016 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1414,8 +1417,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1414 task->tk_pid, task->tk_status); 1417 task->tk_pid, task->tk_status);
1415 1418
1416 /* Call the NFS version-specific code */ 1419 /* Call the NFS version-specific code */
1417 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1420 NFS_PROTO(data->inode)->commit_done(task, data);
1418 return;
1419} 1421}
1420 1422
1421void nfs_commit_release_pages(struct nfs_write_data *data) 1423void nfs_commit_release_pages(struct nfs_write_data *data)
@@ -1483,9 +1485,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1483 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1485 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1484 if (res <= 0) 1486 if (res <= 0)
1485 goto out_mark_dirty; 1487 goto out_mark_dirty;
1486 spin_lock(&inode->i_lock);
1487 res = nfs_scan_commit(inode, &head, 0, 0); 1488 res = nfs_scan_commit(inode, &head, 0, 0);
1488 spin_unlock(&inode->i_lock);
1489 if (res) { 1489 if (res) {
1490 int error; 1490 int error;
1491 1491
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ec0f277be7f5..6940439bd609 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -173,7 +173,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
173 return -EINVAL; 173 return -EINVAL;
174 break; 174 break;
175 case ACL_MASK: 175 case ACL_MASK:
176 /* Solaris sometimes sets additonal bits in the mask */ 176 /* Solaris sometimes sets additional bits in the mask */
177 entry->e_perm &= S_IRWXO; 177 entry->e_perm &= S_IRWXO;
178 break; 178 break;
179 default: 179 default:
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 0c6d81670137..7c831a2731fa 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
38 exp_readlock(); 38 exp_readlock();
39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); 39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
40 fh_put(&fh); 40 fh_put(&fh);
41 rqstp->rq_client = NULL;
42 exp_readunlock(); 41 exp_readunlock();
43 /* We return nlm error codes as nlm doesn't know 42 /* We return nlm error codes as nlm doesn't know
44 * about nfsd, but nfsd does know about nlm.. 43 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7e84a852cdae..ad48faca20fc 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -702,7 +702,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
702 *p++ = htonl(resp->eof); 702 *p++ = htonl(resp->eof);
703 *p++ = htonl(resp->count); /* xdr opaque count */ 703 *p++ = htonl(resp->count); /* xdr opaque count */
704 xdr_ressize_check(rqstp, p); 704 xdr_ressize_check(rqstp, p);
705 /* now update rqstp->rq_res to reflect data aswell */ 705 /* now update rqstp->rq_res to reflect data as well */
706 rqstp->rq_res.page_len = resp->count; 706 rqstp->rq_res.page_len = resp->count;
707 if (resp->count & 3) { 707 if (resp->count & 3) {
708 /* need to pad the tail */ 708 /* need to pad the tail */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fbde6f79922e..4cf04e11c66c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -258,6 +258,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
258 if (atomic_dec_and_test(&fp->fi_delegees)) { 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
260 fp->fi_lease = NULL; 260 fp->fi_lease = NULL;
261 fput(fp->fi_deleg_file);
261 fp->fi_deleg_file = NULL; 262 fp->fi_deleg_file = NULL;
262 } 263 }
263} 264}
@@ -397,9 +398,12 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp)
397 398
398static void free_generic_stateid(struct nfs4_stateid *stp) 399static void free_generic_stateid(struct nfs4_stateid *stp)
399{ 400{
400 int oflag = nfs4_access_bmap_to_omode(stp); 401 int oflag;
401 402
402 nfs4_file_put_access(stp->st_file, oflag); 403 if (stp->st_access_bmap) {
404 oflag = nfs4_access_bmap_to_omode(stp);
405 nfs4_file_put_access(stp->st_file, oflag);
406 }
403 put_nfs4_file(stp->st_file); 407 put_nfs4_file(stp->st_file);
404 kmem_cache_free(stateid_slab, stp); 408 kmem_cache_free(stateid_slab, stp);
405} 409}
@@ -3055,7 +3059,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
3055 if (ONE_STATEID(stateid) && (flags & RD_STATE)) 3059 if (ONE_STATEID(stateid) && (flags & RD_STATE))
3056 return nfs_ok; 3060 return nfs_ok;
3057 else if (locks_in_grace()) { 3061 else if (locks_in_grace()) {
3058 /* Answer in remaining cases depends on existance of 3062 /* Answer in remaining cases depends on existence of
3059 * conflicting state; so we must wait out the grace period. */ 3063 * conflicting state; so we must wait out the grace period. */
3060 return nfserr_grace; 3064 return nfserr_grace;
3061 } else if (flags & WR_STATE) 3065 } else if (flags & WR_STATE)
@@ -3675,7 +3679,7 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3675/* 3679/*
3676 * Alloc a lock owner structure. 3680 * Alloc a lock owner structure.
3677 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 3681 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
3678 * occured. 3682 * occurred.
3679 * 3683 *
3680 * strhashval = lock_ownerstr_hashval 3684 * strhashval = lock_ownerstr_hashval
3681 */ 3685 */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 4ce005dbf3e6..65ec595e2226 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -451,7 +451,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
451 *p++ = htonl(resp->count); 451 *p++ = htonl(resp->count);
452 xdr_ressize_check(rqstp, p); 452 xdr_ressize_check(rqstp, p);
453 453
454 /* now update rqstp->rq_res to reflect data aswell */ 454 /* now update rqstp->rq_res to reflect data as well */
455 rqstp->rq_res.page_len = resp->count; 455 rqstp->rq_res.page_len = resp->count;
456 if (resp->count & 3) { 456 if (resp->count & 3) {
457 /* need to pad the tail */ 457 /* need to pad the tail */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e1cebde90df..129f3c9f62d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1363 goto out; 1363 goto out;
1364 if (!(iap->ia_valid & ATTR_MODE)) 1364 if (!(iap->ia_valid & ATTR_MODE))
1365 iap->ia_mode = 0; 1365 iap->ia_mode = 0;
1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
1367 if (err) 1367 if (err)
1368 goto out; 1368 goto out;
1369 1369
@@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1385 if (IS_ERR(dchild)) 1385 if (IS_ERR(dchild))
1386 goto out_nfserr; 1386 goto out_nfserr;
1387 1387
1388 /* If file doesn't exist, check for permissions to create one */
1389 if (!dchild->d_inode) {
1390 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1391 if (err)
1392 goto out;
1393 }
1394
1388 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1395 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1389 if (err) 1396 if (err)
1390 goto out; 1397 goto out;
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 0a0a66d98cce..f7684483785e 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
646 unsigned long group, group_offset; 646 unsigned long group, group_offset;
647 int i, j, n, ret; 647 int i, j, n, ret;
648 648
649 for (i = 0; i < nitems; i += n) { 649 for (i = 0; i < nitems; i = j) {
650 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); 650 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
651 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); 651 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
652 if (ret < 0) 652 if (ret < 0)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 93589fccdd97..397e73258631 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -72,10 +72,9 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
72 /* 72 /*
73 * check to see if the page is mapped already (no holes) 73 * check to see if the page is mapped already (no holes)
74 */ 74 */
75 if (PageMappedToDisk(page)) { 75 if (PageMappedToDisk(page))
76 unlock_page(page);
77 goto mapped; 76 goto mapped;
78 } 77
79 if (page_has_buffers(page)) { 78 if (page_has_buffers(page)) {
80 struct buffer_head *bh, *head; 79 struct buffer_head *bh, *head;
81 int fully_mapped = 1; 80 int fully_mapped = 1;
@@ -90,7 +89,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
90 89
91 if (fully_mapped) { 90 if (fully_mapped) {
92 SetPageMappedToDisk(page); 91 SetPageMappedToDisk(page);
93 unlock_page(page);
94 goto mapped; 92 goto mapped;
95 } 93 }
96 } 94 }
@@ -105,16 +103,17 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
105 return VM_FAULT_SIGBUS; 103 return VM_FAULT_SIGBUS;
106 104
107 ret = block_page_mkwrite(vma, vmf, nilfs_get_block); 105 ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
108 if (unlikely(ret)) { 106 if (ret != VM_FAULT_LOCKED) {
109 nilfs_transaction_abort(inode->i_sb); 107 nilfs_transaction_abort(inode->i_sb);
110 return ret; 108 return ret;
111 } 109 }
110 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
112 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
113 112
114 mapped: 113 mapped:
115 SetPageChecked(page); 114 SetPageChecked(page);
116 wait_on_page_writeback(page); 115 wait_on_page_writeback(page);
117 return 0; 116 return VM_FAULT_LOCKED;
118} 117}
119 118
120static const struct vm_operations_struct nilfs_file_vm_ops = { 119static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 856e8e4e0b74..a8dd344303cb 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -114,19 +114,19 @@ enum {
114 * Macros to check inode numbers 114 * Macros to check inode numbers
115 */ 115 */
116#define NILFS_MDT_INO_BITS \ 116#define NILFS_MDT_INO_BITS \
117 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ 117 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
118 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ 118 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
119 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) 119 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
120 120
121#define NILFS_SYS_INO_BITS \ 121#define NILFS_SYS_INO_BITS \
122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) 122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
123 123
124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) 124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
125 125
126#define NILFS_MDT_INODE(sb, ino) \ 126#define NILFS_MDT_INODE(sb, ino) \
127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) 127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
128#define NILFS_VALID_INODE(sb, ino) \ 128#define NILFS_VALID_INODE(sb, ino) \
129 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) 129 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
130 130
131/** 131/**
132 * struct nilfs_transaction_info: context information for synchronization 132 * struct nilfs_transaction_info: context information for synchronization
@@ -285,7 +285,7 @@ extern void nilfs_destroy_inode(struct inode *);
285extern void nilfs_error(struct super_block *, const char *, const char *, ...) 285extern void nilfs_error(struct super_block *, const char *, const char *, ...)
286 __attribute__ ((format (printf, 3, 4))); 286 __attribute__ ((format (printf, 3, 4)));
287extern void nilfs_warning(struct super_block *, const char *, const char *, ...) 287extern void nilfs_warning(struct super_block *, const char *, const char *, ...)
288 __attribute__ ((format (printf, 3, 4))); 288 __attribute__ ((format (printf, 3, 4)));
289extern struct nilfs_super_block * 289extern struct nilfs_super_block *
290nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); 290nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
291extern int nilfs_store_magic_and_option(struct super_block *, 291extern int nilfs_store_magic_and_option(struct super_block *,
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 4d2a1ee0eb47..1168059c7efd 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -500,7 +500,7 @@ void nilfs_mapping_init(struct address_space *mapping,
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 500 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 501 mapping->assoc_mapping = NULL;
502 mapping->backing_dev_info = bdi; 502 mapping->backing_dev_info = bdi;
503 mapping->a_ops = NULL; 503 mapping->a_ops = &empty_aops;
504} 504}
505 505
506/* 506/*
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6b1305dc26c0..9fde1c00a296 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -164,7 +164,7 @@ static int process_access_response(struct fsnotify_group *group,
164 fd, response); 164 fd, response);
165 /* 165 /*
166 * make sure the response is valid, if invalid we do nothing and either 166 * make sure the response is valid, if invalid we do nothing and either
167 * userspace can send a valid responce or we will clean it up after the 167 * userspace can send a valid response or we will clean it up after the
168 * timeout 168 * timeout
169 */ 169 */
170 switch (response) { 170 switch (response) {
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a91b69a6a291..e3cbd746f64a 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -194,10 +194,11 @@ static int idr_callback(int id, void *p, void *data)
194 194
195static void inotify_free_group_priv(struct fsnotify_group *group) 195static void inotify_free_group_priv(struct fsnotify_group *group)
196{ 196{
197 /* ideally the idr is empty and we won't hit the BUG in teh callback */ 197 /* ideally the idr is empty and we won't hit the BUG in the callback */
198 idr_for_each(&group->inotify_data.idr, idr_callback, group); 198 idr_for_each(&group->inotify_data.idr, idr_callback, group);
199 idr_remove_all(&group->inotify_data.idr); 199 idr_remove_all(&group->inotify_data.idr);
200 idr_destroy(&group->inotify_data.idr); 200 idr_destroy(&group->inotify_data.idr);
201 atomic_dec(&group->inotify_data.user->inotify_devs);
201 free_uid(group->inotify_data.user); 202 free_uid(group->inotify_data.user);
202} 203}
203 204
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bd46e7c8a0ef..8445fbc8985c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on)
290static int inotify_release(struct inode *ignored, struct file *file) 290static int inotify_release(struct inode *ignored, struct file *file)
291{ 291{
292 struct fsnotify_group *group = file->private_data; 292 struct fsnotify_group *group = file->private_data;
293 struct user_struct *user = group->inotify_data.user;
294 293
295 pr_debug("%s: group=%p\n", __func__, group); 294 pr_debug("%s: group=%p\n", __func__, group);
296 295
@@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
299 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 298 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
300 fsnotify_put_group(group); 299 fsnotify_put_group(group);
301 300
302 atomic_dec(&user->inotify_devs);
303
304 return 0; 301 return 0;
305} 302}
306 303
@@ -697,7 +694,7 @@ retry:
697 return ret; 694 return ret;
698} 695}
699 696
700static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) 697static struct fsnotify_group *inotify_new_group(unsigned int max_events)
701{ 698{
702 struct fsnotify_group *group; 699 struct fsnotify_group *group;
703 700
@@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
710 spin_lock_init(&group->inotify_data.idr_lock); 707 spin_lock_init(&group->inotify_data.idr_lock);
711 idr_init(&group->inotify_data.idr); 708 idr_init(&group->inotify_data.idr);
712 group->inotify_data.last_wd = 0; 709 group->inotify_data.last_wd = 0;
713 group->inotify_data.user = user;
714 group->inotify_data.fa = NULL; 710 group->inotify_data.fa = NULL;
711 group->inotify_data.user = get_current_user();
712
713 if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
714 inotify_max_user_instances) {
715 fsnotify_put_group(group);
716 return ERR_PTR(-EMFILE);
717 }
715 718
716 return group; 719 return group;
717} 720}
@@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
721SYSCALL_DEFINE1(inotify_init1, int, flags) 724SYSCALL_DEFINE1(inotify_init1, int, flags)
722{ 725{
723 struct fsnotify_group *group; 726 struct fsnotify_group *group;
724 struct user_struct *user;
725 int ret; 727 int ret;
726 728
727 /* Check the IN_* constants for consistency. */ 729 /* Check the IN_* constants for consistency. */
@@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
731 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) 733 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
732 return -EINVAL; 734 return -EINVAL;
733 735
734 user = get_current_user();
735 if (unlikely(atomic_read(&user->inotify_devs) >=
736 inotify_max_user_instances)) {
737 ret = -EMFILE;
738 goto out_free_uid;
739 }
740
741 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ 736 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
742 group = inotify_new_group(user, inotify_max_queued_events); 737 group = inotify_new_group(inotify_max_queued_events);
743 if (IS_ERR(group)) { 738 if (IS_ERR(group))
744 ret = PTR_ERR(group); 739 return PTR_ERR(group);
745 goto out_free_uid;
746 }
747
748 atomic_inc(&user->inotify_devs);
749 740
750 ret = anon_inode_getfd("inotify", &inotify_fops, group, 741 ret = anon_inode_getfd("inotify", &inotify_fops, group,
751 O_RDONLY | flags); 742 O_RDONLY | flags);
752 if (ret >= 0) 743 if (ret < 0)
753 return ret; 744 fsnotify_put_group(group);
754 745
755 fsnotify_put_group(group);
756 atomic_dec(&user->inotify_devs);
757out_free_uid:
758 free_uid(user);
759 return ret; 746 return ret;
760} 747}
761 748
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 50c00856f730..252ab1f6452b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -24,7 +24,7 @@
24 * referencing this object. The object typically will live inside the kernel 24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task 25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference 26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped. 27 * and the object itself is guaranteed to survive until the reference is dropped.
28 * 28 *
29 * LOCKING: 29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST 30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f5094ee224c1..f14fde2b03d6 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -197,7 +197,7 @@ err_out:
197 } else if (ctx_needs_reset) { 197 } else if (ctx_needs_reset) {
198 /* 198 /*
199 * If there is no attribute list, restoring the search context 199 * If there is no attribute list, restoring the search context
200 * is acomplished simply by copying the saved context back over 200 * is accomplished simply by copying the saved context back over
201 * the caller supplied context. If there is an attribute list, 201 * the caller supplied context. If there is an attribute list,
202 * things are more complicated as we need to deal with mapping 202 * things are more complicated as we need to deal with mapping
203 * of mft records and resulting potential changes in pointers. 203 * of mft records and resulting potential changes in pointers.
@@ -1181,7 +1181,7 @@ not_found:
1181 * for, i.e. if one wants to add the attribute to the mft record this is the 1181 * for, i.e. if one wants to add the attribute to the mft record this is the
1182 * correct place to insert its attribute list entry into. 1182 * correct place to insert its attribute list entry into.
1183 * 1183 *
1184 * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is 1184 * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is
1185 * then undefined and in particular you should not rely on it not changing. 1185 * then undefined and in particular you should not rely on it not changing.
1186 */ 1186 */
1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ef9ed854255c..ee4144ce5d7c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -501,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page)
501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> 501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
502 vol->cluster_size_bits; 502 vol->cluster_size_bits;
503 /* 503 /*
504 * The first vcn after the last wanted vcn (minumum alignment is again 504 * The first vcn after the last wanted vcn (minimum alignment is again
505 * PAGE_CACHE_SIZE. 505 * PAGE_CACHE_SIZE.
506 */ 506 */
507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) 507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 0b56c6b7ec01..c05d6dcf77a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -622,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
622 */ 622 */
623 /* Everyone gets all permissions. */ 623 /* Everyone gets all permissions. */
624 vi->i_mode |= S_IRWXUGO; 624 vi->i_mode |= S_IRWXUGO;
625 /* If read-only, noone gets write permissions. */ 625 /* If read-only, no one gets write permissions. */
626 if (IS_RDONLY(vi)) 626 if (IS_RDONLY(vi))
627 vi->i_mode &= ~S_IWUGO; 627 vi->i_mode &= ~S_IWUGO;
628 if (m->flags & MFT_RECORD_IS_DIRECTORY) { 628 if (m->flags & MFT_RECORD_IS_DIRECTORY) {
@@ -2529,7 +2529,7 @@ retry_truncate:
2529 * specifies that the behaviour is unspecified thus we do not 2529 * specifies that the behaviour is unspecified thus we do not
2530 * have to do anything. This means that in our implementation 2530 * have to do anything. This means that in our implementation
2531 * in the rare case that the file is mmap()ped and a write 2531 * in the rare case that the file is mmap()ped and a write
2532 * occured into the mmap()ped region just beyond the file size 2532 * occurred into the mmap()ped region just beyond the file size
2533 * and writepage has not yet been called to write out the page 2533 * and writepage has not yet been called to write out the page
2534 * (which would clear the area beyond the file size) and we now 2534 * (which would clear the area beyond the file size) and we now
2535 * extend the file size to incorporate this dirty region 2535 * extend the file size to incorporate this dirty region
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 8b2549f672bf..faece7190866 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -286,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS;
286 * fragmented. Volume free space includes the empty part of the mft zone and 286 * fragmented. Volume free space includes the empty part of the mft zone and
287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor 287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor
288 * of 2, thus making more space available for more files/data. This process is 288 * of 2, thus making more space available for more files/data. This process is
289 * repeated everytime there is no more free space except for the mft zone until 289 * repeated every time there is no more free space except for the mft zone until
290 * there really is no more free space. 290 * there really is no more free space.
291 */ 291 */
292 292
@@ -1657,13 +1657,13 @@ typedef enum {
1657 * pointed to by the Owner field was provided by a defaulting mechanism 1657 * pointed to by the Owner field was provided by a defaulting mechanism
1658 * rather than explicitly provided by the original provider of the 1658 * rather than explicitly provided by the original provider of the
1659 * security descriptor. This may affect the treatment of the SID with 1659 * security descriptor. This may affect the treatment of the SID with
1660 * respect to inheritence of an owner. 1660 * respect to inheritance of an owner.
1661 * 1661 *
1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in 1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in
1663 * the Group field was provided by a defaulting mechanism rather than 1663 * the Group field was provided by a defaulting mechanism rather than
1664 * explicitly provided by the original provider of the security 1664 * explicitly provided by the original provider of the security
1665 * descriptor. This may affect the treatment of the SID with respect to 1665 * descriptor. This may affect the treatment of the SID with respect to
1666 * inheritence of a primary group. 1666 * inheritance of a primary group.
1667 * 1667 *
1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security 1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security
1669 * descriptor contains a discretionary ACL. If this flag is set and the 1669 * descriptor contains a discretionary ACL. If this flag is set and the
@@ -1674,7 +1674,7 @@ typedef enum {
1674 * pointed to by the Dacl field was provided by a defaulting mechanism 1674 * pointed to by the Dacl field was provided by a defaulting mechanism
1675 * rather than explicitly provided by the original provider of the 1675 * rather than explicitly provided by the original provider of the
1676 * security descriptor. This may affect the treatment of the ACL with 1676 * security descriptor. This may affect the treatment of the ACL with
1677 * respect to inheritence of an ACL. This flag is ignored if the 1677 * respect to inheritance of an ACL. This flag is ignored if the
1678 * DaclPresent flag is not set. 1678 * DaclPresent flag is not set.
1679 * 1679 *
1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security 1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security
@@ -1686,7 +1686,7 @@ typedef enum {
1686 * pointed to by the Sacl field was provided by a defaulting mechanism 1686 * pointed to by the Sacl field was provided by a defaulting mechanism
1687 * rather than explicitly provided by the original provider of the 1687 * rather than explicitly provided by the original provider of the
1688 * security descriptor. This may affect the treatment of the ACL with 1688 * security descriptor. This may affect the treatment of the ACL with
1689 * respect to inheritence of an ACL. This flag is ignored if the 1689 * respect to inheritance of an ACL. This flag is ignored if the
1690 * SaclPresent flag is not set. 1690 * SaclPresent flag is not set.
1691 * 1691 *
1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security 1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security
@@ -2283,7 +2283,7 @@ typedef struct {
2283 // the key_length is zero, then the vcn immediately 2283 // the key_length is zero, then the vcn immediately
2284 // follows the INDEX_ENTRY_HEADER. Regardless of 2284 // follows the INDEX_ENTRY_HEADER. Regardless of
2285 // key_length, the address of the 8-byte boundary 2285 // key_length, the address of the 8-byte boundary
2286 // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by 2286 // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), 2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */ 2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */
2289} __attribute__ ((__packed__)) INDEX_ENTRY; 2289} __attribute__ ((__packed__)) INDEX_ENTRY;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 4dadcdf3d451..c71de292c5ad 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -669,7 +669,7 @@ err_out:
669 * of cases where we think that a volume is dirty when in fact it is clean. 669 * of cases where we think that a volume is dirty when in fact it is clean.
670 * This should only affect volumes that have not been shutdown cleanly but did 670 * This should only affect volumes that have not been shutdown cleanly but did
671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle 671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle
672 * at least for the five seconds preceeding the unclean shutdown. 672 * at least for the five seconds preceding the unclean shutdown.
673 * 673 *
674 * This function assumes that the $LogFile journal has already been consistency 674 * This function assumes that the $LogFile journal has already been consistency
675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile 675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index b5a6f08bd35c..aa2b6ac3f0a4 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -222,7 +222,7 @@ typedef struct {
222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the 222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the
223 restart_area_offset + the offset of the 223 restart_area_offset + the offset of the
224 file_size are > 510 then corruption has 224 file_size are > 510 then corruption has
225 occured. This is the very first check when 225 occurred. This is the very first check when
226 starting with the restart_area as if it 226 starting with the restart_area as if it
227 fails it means that some of the above values 227 fails it means that some of the above values
228 will be corrupted by the multi sector 228 will be corrupted by the multi sector
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 326e7475a22a..382857f9c7db 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -73,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + 73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
74 vol->mft_record_size) { 74 vol->mft_record_size) {
75 page = ERR_PTR(-ENOENT); 75 page = ERR_PTR(-ENOENT);
76 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " 76 ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
77 "which is beyond the end of the mft. " 77 "which is beyond the end of the mft. "
78 "This is probably a bug in the ntfs " 78 "This is probably a bug in the ntfs "
79 "driver.", ni->mft_no); 79 "driver.", ni->mft_no);
@@ -1442,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1442 // Note: It will need to be a special mft record and if none of 1442 // Note: It will need to be a special mft record and if none of
1443 // those are available it gets rather complicated... 1443 // those are available it gets rather complicated...
1444 ntfs_error(vol->sb, "Not enough space in this mft record to " 1444 ntfs_error(vol->sb, "Not enough space in this mft record to "
1445 "accomodate extended mft bitmap attribute " 1445 "accommodate extended mft bitmap attribute "
1446 "extent. Cannot handle this yet."); 1446 "extent. Cannot handle this yet.");
1447 ret = -EOPNOTSUPP; 1447 ret = -EOPNOTSUPP;
1448 goto undo_alloc; 1448 goto undo_alloc;
@@ -1879,7 +1879,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1879 // and we would then need to update all references to this mft 1879 // and we would then need to update all references to this mft
1880 // record appropriately. This is rather complicated... 1880 // record appropriately. This is rather complicated...
1881 ntfs_error(vol->sb, "Not enough space in this mft record to " 1881 ntfs_error(vol->sb, "Not enough space in this mft record to "
1882 "accomodate extended mft data attribute " 1882 "accommodate extended mft data attribute "
1883 "extent. Cannot handle this yet."); 1883 "extent. Cannot handle this yet.");
1884 ret = -EOPNOTSUPP; 1884 ret = -EOPNOTSUPP;
1885 goto undo_alloc; 1885 goto undo_alloc;
@@ -2357,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2357 } 2357 }
2358#ifdef DEBUG 2358#ifdef DEBUG
2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2360 ntfs_debug("Status of mftbmp after initialized extention: " 2360 ntfs_debug("Status of mftbmp after initialized extension: "
2361 "allocated_size 0x%llx, data_size 0x%llx, " 2361 "allocated_size 0x%llx, data_size 0x%llx, "
2362 "initialized_size 0x%llx.", 2362 "initialized_size 0x%llx.",
2363 (long long)mftbmp_ni->allocated_size, 2363 (long long)mftbmp_ni->allocated_size,
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 56a9a6d25a2a..eac7d6788a10 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1243,7 +1243,7 @@ err_out:
1243 * write. 1243 * write.
1244 * 1244 *
1245 * This is used when building the mapping pairs array of a runlist to compress 1245 * This is used when building the mapping pairs array of a runlist to compress
1246 * a given logical cluster number (lcn) or a specific run length to the minumum 1246 * a given logical cluster number (lcn) or a specific run length to the minimum
1247 * size possible. 1247 * size possible.
1248 * 1248 *
1249 * Return the number of bytes written on success. On error, i.e. the 1249 * Return the number of bytes written on success. On error, i.e. the
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 29099a07b9fe..b52706da4645 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -458,7 +458,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
458 * the volume on boot and updates them. 458 * the volume on boot and updates them.
459 * 459 *
460 * When remounting read-only, mark the volume clean if no volume errors 460 * When remounting read-only, mark the volume clean if no volume errors
461 * have occured. 461 * have occurred.
462 */ 462 */
463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
464 static const char *es = ". Cannot remount read-write."; 464 static const char *es = ". Cannot remount read-write.";
@@ -1269,7 +1269,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1269 "hibernated on the volume."); 1269 "hibernated on the volume.");
1270 return 0; 1270 return 0;
1271 } 1271 }
1272 /* A real error occured. */ 1272 /* A real error occurred. */
1273 ntfs_error(vol->sb, "Failed to find inode number for " 1273 ntfs_error(vol->sb, "Failed to find inode number for "
1274 "hiberfil.sys."); 1274 "hiberfil.sys.");
1275 return ret; 1275 return ret;
@@ -1370,7 +1370,7 @@ static bool load_and_init_quota(ntfs_volume *vol)
1370 NVolSetQuotaOutOfDate(vol); 1370 NVolSetQuotaOutOfDate(vol);
1371 return true; 1371 return true;
1372 } 1372 }
1373 /* A real error occured. */ 1373 /* A real error occurred. */
1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota."); 1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota.");
1375 return false; 1375 return false;
1376 } 1376 }
@@ -1454,7 +1454,7 @@ not_enabled:
1454 NVolSetUsnJrnlStamped(vol); 1454 NVolSetUsnJrnlStamped(vol);
1455 return true; 1455 return true;
1456 } 1456 }
1457 /* A real error occured. */ 1457 /* A real error occurred. */
1458 ntfs_error(vol->sb, "Failed to find inode number for " 1458 ntfs_error(vol->sb, "Failed to find inode number for "
1459 "$UsnJrnl."); 1459 "$UsnJrnl.");
1460 return false; 1460 return false;
@@ -2292,7 +2292,7 @@ static void ntfs_put_super(struct super_block *sb)
2292 ntfs_commit_inode(vol->mft_ino); 2292 ntfs_commit_inode(vol->mft_ino);
2293 2293
2294 /* 2294 /*
2295 * If a read-write mount and no volume errors have occured, mark the 2295 * If a read-write mount and no volume errors have occurred, mark the
2296 * volume clean. Also, re-commit all affected inodes. 2296 * volume clean. Also, re-commit all affected inodes.
2297 */ 2297 */
2298 if (!(sb->s_flags & MS_RDONLY)) { 2298 if (!(sb->s_flags & MS_RDONLY)) {
@@ -2496,7 +2496,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2496 if (vol->nr_clusters & 63) 2496 if (vol->nr_clusters & 63)
2497 nr_free += 64 - (vol->nr_clusters & 63); 2497 nr_free += 64 - (vol->nr_clusters & 63);
2498 up_read(&vol->lcnbmp_lock); 2498 up_read(&vol->lcnbmp_lock);
2499 /* If errors occured we may well have gone below zero, fix this. */ 2499 /* If errors occurred we may well have gone below zero, fix this. */
2500 if (nr_free < 0) 2500 if (nr_free < 0)
2501 nr_free = 0; 2501 nr_free = 0;
2502 ntfs_debug("Exiting."); 2502 ntfs_debug("Exiting.");
@@ -2561,7 +2561,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2561 } 2561 }
2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
2563 index - 1); 2563 index - 1);
2564 /* If errors occured we may well have gone below zero, fix this. */ 2564 /* If errors occurred we may well have gone below zero, fix this. */
2565 if (nr_free < 0) 2565 if (nr_free < 0)
2566 nr_free = 0; 2566 nr_free = 0;
2567 ntfs_debug("Exiting."); 2567 ntfs_debug("Exiting.");
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b27a0d86f8c5..48aa9c7401c7 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4519,7 +4519,7 @@ set_tail_append:
4519} 4519}
4520 4520
4521/* 4521/*
4522 * Helper function called at the begining of an insert. 4522 * Helper function called at the beginning of an insert.
4523 * 4523 *
4524 * This computes a few things that are commonly used in the process of 4524 * This computes a few things that are commonly used in the process of
4525 * inserting into the btree: 4525 * inserting into the btree:
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index eceb456037c1..75cf3ad987a6 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -71,7 +71,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
71 71
72/* 72/*
73 * Using a named enum representing lock types in terms of #N bit stored in 73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen 74 * iocb->private, which is going to be used for communication between
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). 75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */ 76 */
77enum ocfs2_iocb_lock_bits { 77enum ocfs2_iocb_lock_bits {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 2461eb3272ed..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
539 539
540/* We want to make sure that nobody is heartbeating on top of us -- 540/* We want to make sure that nobody is heartbeating on top of us --
541 * this will help detect an invalid configuration. */ 541 * this will help detect an invalid configuration. */
542static int o2hb_check_last_timestamp(struct o2hb_region *reg) 542static void o2hb_check_last_timestamp(struct o2hb_region *reg)
543{ 543{
544 int node_num, ret;
545 struct o2hb_disk_slot *slot; 544 struct o2hb_disk_slot *slot;
546 struct o2hb_disk_heartbeat_block *hb_block; 545 struct o2hb_disk_heartbeat_block *hb_block;
546 char *errstr;
547 547
548 node_num = o2nm_this_node(); 548 slot = &reg->hr_slots[o2nm_this_node()];
549
550 ret = 1;
551 slot = &reg->hr_slots[node_num];
552 /* Don't check on our 1st timestamp */ 549 /* Don't check on our 1st timestamp */
553 if (slot->ds_last_time) { 550 if (!slot->ds_last_time)
554 hb_block = slot->ds_raw_block; 551 return;
555 552
556 if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) 553 hb_block = slot->ds_raw_block;
557 ret = 0; 554 if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
558 } 555 le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
556 hb_block->hb_node == slot->ds_node_num)
557 return;
559 558
560 return ret; 559#define ERRSTR1 "Another node is heartbeating on device"
560#define ERRSTR2 "Heartbeat generation mismatch on device"
561#define ERRSTR3 "Heartbeat sequence mismatch on device"
562
563 if (hb_block->hb_node != slot->ds_node_num)
564 errstr = ERRSTR1;
565 else if (le64_to_cpu(hb_block->hb_generation) !=
566 slot->ds_last_generation)
567 errstr = ERRSTR2;
568 else
569 errstr = ERRSTR3;
570
571 mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
572 "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
573 slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
574 (unsigned long long)slot->ds_last_time, hb_block->hb_node,
575 (unsigned long long)le64_to_cpu(hb_block->hb_generation),
576 (unsigned long long)le64_to_cpu(hb_block->hb_seq));
561} 577}
562 578
563static inline void o2hb_prepare_block(struct o2hb_region *reg, 579static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
983 /* With an up to date view of the slots, we can check that no 999 /* With an up to date view of the slots, we can check that no
984 * other node has been improperly configured to heartbeat in 1000 * other node has been improperly configured to heartbeat in
985 * our slot. */ 1001 * our slot. */
986 if (!o2hb_check_last_timestamp(reg)) 1002 o2hb_check_last_timestamp(reg);
987 mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
988 "in our slot!\n", reg->hr_dev_name);
989 1003
990 /* fill in the proper info for our next heartbeat */ 1004 /* fill in the proper info for our next heartbeat */
991 o2hb_prepare_block(reg, reg->hr_generation); 1005 o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
999 } 1013 }
1000 1014
1001 i = -1; 1015 i = -1;
1002 while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { 1016 while((i = find_next_bit(configured_nodes,
1003 1017 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
1004 change |= o2hb_check_slot(reg, &reg->hr_slots[i]); 1018 change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
1005 } 1019 }
1006 1020
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1690 struct file *filp = NULL; 1704 struct file *filp = NULL;
1691 struct inode *inode = NULL; 1705 struct inode *inode = NULL;
1692 ssize_t ret = -EINVAL; 1706 ssize_t ret = -EINVAL;
1707 int live_threshold;
1693 1708
1694 if (reg->hr_bdev) 1709 if (reg->hr_bdev)
1695 goto out; 1710 goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 * A node is considered live after it has beat LIVE_THRESHOLD 1781 * A node is considered live after it has beat LIVE_THRESHOLD
1767 * times. We're not steady until we've given them a chance 1782 * times. We're not steady until we've given them a chance
1768 * _after_ our first read. 1783 * _after_ our first read.
1784 * The default threshold is bare minimum so as to limit the delay
1785 * during mounts. For global heartbeat, the threshold doubled for the
1786 * first region.
1769 */ 1787 */
1770 atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); 1788 live_threshold = O2HB_LIVE_THRESHOLD;
1789 if (o2hb_global_heartbeat_active()) {
1790 spin_lock(&o2hb_live_lock);
1791 if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
1792 live_threshold <<= 1;
1793 spin_unlock(&o2hb_live_lock);
1794 }
1795 atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
1771 1796
1772 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1797 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
1773 reg->hr_item.ci_name); 1798 reg->hr_item.ci_name);
@@ -2275,7 +2300,7 @@ void o2hb_free_hb_set(struct config_group *group)
2275 kfree(hs); 2300 kfree(hs);
2276} 2301}
2277 2302
2278/* hb callback registration and issueing */ 2303/* hb callback registration and issuing */
2279 2304
2280static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type) 2305static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type)
2281{ 2306{
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index a87366750f23..8f9cea1597af 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -89,7 +89,7 @@ static void o2quo_fence_self(void)
89 }; 89 };
90} 90}
91 91
92/* Indicate that a timeout occured on a hearbeat region write. The 92/* Indicate that a timeout occurred on a hearbeat region write. The
93 * other nodes in the cluster may consider us dead at that time so we 93 * other nodes in the cluster may consider us dead at that time so we
94 * want to "fence" ourselves so that we don't scribble on the disk 94 * want to "fence" ourselves so that we don't scribble on the disk
95 * after they think they've recovered us. This can't solve all 95 * after they think they've recovered us. This can't solve all
@@ -261,7 +261,7 @@ void o2quo_hb_still_up(u8 node)
261 spin_unlock(&qs->qs_lock); 261 spin_unlock(&qs->qs_lock);
262} 262}
263 263
264/* This is analagous to hb_up. as a node's connection comes up we delay the 264/* This is analogous to hb_up. as a node's connection comes up we delay the
265 * quorum decision until we see it heartbeating. the hold will be droped in 265 * quorum decision until we see it heartbeating. the hold will be droped in
266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if 266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
267 * it's already heartbeating we we might be dropping a hold that conn_up got. 267 * it's already heartbeating we we might be dropping a hold that conn_up got.
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ee04ff5ee603..db5ee4b4f47a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -565,7 +565,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
565 * the work queue actually being up. */ 565 * the work queue actually being up. */
566 if (!valid && o2net_wq) { 566 if (!valid && o2net_wq) {
567 unsigned long delay; 567 unsigned long delay;
568 /* delay if we're withing a RECONNECT_DELAY of the 568 /* delay if we're within a RECONNECT_DELAY of the
569 * last attempt */ 569 * last attempt */
570 delay = (nn->nn_last_connect_attempt + 570 delay = (nn->nn_last_connect_attempt +
571 msecs_to_jiffies(o2net_reconnect_delay())) 571 msecs_to_jiffies(o2net_reconnect_delay()))
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9fe5b8fd658f..8582e3f4f120 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2868 bytes = blocks_wanted << sb->s_blocksize_bits; 2868 bytes = blocks_wanted << sb->s_blocksize_bits;
2869 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 2869 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2870 struct ocfs2_inode_info *oi = OCFS2_I(dir); 2870 struct ocfs2_inode_info *oi = OCFS2_I(dir);
2871 struct ocfs2_alloc_context *data_ac; 2871 struct ocfs2_alloc_context *data_ac = NULL;
2872 struct ocfs2_alloc_context *meta_ac = NULL; 2872 struct ocfs2_alloc_context *meta_ac = NULL;
2873 struct buffer_head *dirdata_bh = NULL; 2873 struct buffer_head *dirdata_bh = NULL;
2874 struct buffer_head *dx_root_bh = NULL; 2874 struct buffer_head *dx_root_bh = NULL;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7540a492eaba..3b179d6cbde0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1614 spin_unlock(&dlm->spinlock); 1614 spin_unlock(&dlm->spinlock);
1615 1615
1616 /* Support for global heartbeat and node info was added in 1.1 */ 1616 /* Support for global heartbeat and node info was added in 1.1 */
1617 if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { 1617 if (dlm->dlm_locking_proto.pv_major > 1 ||
1618 dlm->dlm_locking_proto.pv_minor > 0) {
1618 status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); 1619 status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
1619 if (status) { 1620 if (status) {
1620 mlog_errno(status); 1621 mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9d67610dfc74..84d166328cf7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -808,7 +808,7 @@ lookup:
808 dlm_mle_detach_hb_events(dlm, mle); 808 dlm_mle_detach_hb_events(dlm, mle);
809 dlm_put_mle(mle); 809 dlm_put_mle(mle);
810 mle = NULL; 810 mle = NULL;
811 /* this is lame, but we cant wait on either 811 /* this is lame, but we can't wait on either
812 * the mle or lockres waitqueue here */ 812 * the mle or lockres waitqueue here */
813 if (mig) 813 if (mig)
814 msleep(100); 814 msleep(100);
@@ -843,7 +843,7 @@ lookup:
843 843
844 /* finally add the lockres to its hash bucket */ 844 /* finally add the lockres to its hash bucket */
845 __dlm_insert_lockres(dlm, res); 845 __dlm_insert_lockres(dlm, res);
846 /* since this lockres is new it doesnt not require the spinlock */ 846 /* since this lockres is new it doesn't not require the spinlock */
847 dlm_lockres_grab_inflight_ref_new(dlm, res); 847 dlm_lockres_grab_inflight_ref_new(dlm, res);
848 848
849 /* if this node does not become the master make sure to drop 849 /* if this node does not become the master make sure to drop
@@ -2574,6 +2574,9 @@ fail:
2574 res->state &= ~DLM_LOCK_RES_MIGRATING; 2574 res->state &= ~DLM_LOCK_RES_MIGRATING;
2575 wake = 1; 2575 wake = 1;
2576 spin_unlock(&res->spinlock); 2576 spin_unlock(&res->spinlock);
2577 if (dlm_is_host_down(ret))
2578 dlm_wait_for_node_death(dlm, target,
2579 DLM_NODE_DEATH_WAIT_MAX);
2577 goto leave; 2580 goto leave;
2578 } 2581 }
2579 2582
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41565ae52856..89659d6dc206 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
1607 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 1607 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1608 1608
1609 if (le32_to_cpu(rec->e_cpos) >= trunc_start) { 1609 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1610 /*
1611 * remove an entire extent record.
1612 */
1610 *trunc_cpos = le32_to_cpu(rec->e_cpos); 1613 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1611 /* 1614 /*
1612 * Skip holes if any. 1615 * Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
1617 *blkno = le64_to_cpu(rec->e_blkno); 1620 *blkno = le64_to_cpu(rec->e_blkno);
1618 *trunc_end = le32_to_cpu(rec->e_cpos); 1621 *trunc_end = le32_to_cpu(rec->e_cpos);
1619 } else if (range > trunc_start) { 1622 } else if (range > trunc_start) {
1623 /*
1624 * remove a partial extent record, which means we're
1625 * removing the last extent record.
1626 */
1620 *trunc_cpos = trunc_start; 1627 *trunc_cpos = trunc_start;
1628 /*
1629 * skip hole if any.
1630 */
1631 if (range < *trunc_end)
1632 *trunc_end = range;
1621 *trunc_len = *trunc_end - trunc_start; 1633 *trunc_len = *trunc_end - trunc_start;
1622 coff = trunc_start - le32_to_cpu(rec->e_cpos); 1634 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1623 *blkno = le64_to_cpu(rec->e_blkno) + 1635 *blkno = le64_to_cpu(rec->e_blkno) +
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 177d3a6c2a5f..b4c8bb6b8d28 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -835,7 +835,7 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
835 835
836 /* If we have allowd wipe of this inode for another node, it 836 /* If we have allowd wipe of this inode for another node, it
837 * will be marked here so we can safely skip it. Recovery will 837 * will be marked here so we can safely skip it. Recovery will
838 * cleanup any inodes we might inadvertantly skip here. */ 838 * cleanup any inodes we might inadvertently skip here. */
839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) 839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
840 goto bail_unlock; 840 goto bail_unlock;
841 841
@@ -917,7 +917,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
917 * the inode open lock in ocfs2_read_locked_inode(). When we 917 * the inode open lock in ocfs2_read_locked_inode(). When we
918 * get to ->delete_inode(), each node tries to convert it's 918 * get to ->delete_inode(), each node tries to convert it's
919 * lock to an exclusive. Trylocks are serialized by the inode 919 * lock to an exclusive. Trylocks are serialized by the inode
920 * meta data lock. If the upconvert suceeds, we know the inode 920 * meta data lock. If the upconvert succeeds, we know the inode
921 * is no longer live and can be deleted. 921 * is no longer live and can be deleted.
922 * 922 *
923 * Though we call this with the meta data lock held, the 923 * Though we call this with the meta data lock held, the
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dcc2d9327150..295d56454e8b 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1260{ 1260{
1261 struct ocfs2_journal *journal = osb->journal; 1261 struct ocfs2_journal *journal = osb->journal;
1262 1262
1263 if (ocfs2_is_hard_readonly(osb))
1264 return;
1265
1263 /* No need to queue up our truncate_log as regular cleanup will catch 1266 /* No need to queue up our truncate_log as regular cleanup will catch
1264 * that */ 1267 * that */
1265 ocfs2_queue_recovery_completion(journal, osb->slot_num, 1268 ocfs2_queue_recovery_completion(journal, osb->slot_num,
@@ -1368,7 +1371,7 @@ skip_recovery:
1368 mlog_errno(status); 1371 mlog_errno(status);
1369 1372
1370 /* Now it is right time to recover quotas... We have to do this under 1373 /* Now it is right time to recover quotas... We have to do this under
1371 * superblock lock so that noone can start using the slot (and crash) 1374 * superblock lock so that no one can start using the slot (and crash)
1372 * before we recover it */ 1375 * before we recover it */
1373 for (i = 0; i < rm_quota_used; i++) { 1376 for (i = 0; i < rm_quota_used; i++) {
1374 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 1377 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6180da1e37e6..68cf2f6d3c6a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -215,7 +215,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
215 /* WARNING: This only kicks off a single 215 /* WARNING: This only kicks off a single
216 * checkpoint. If someone races you and adds more 216 * checkpoint. If someone races you and adds more
217 * metadata to the journal, you won't know, and will 217 * metadata to the journal, you won't know, and will
218 * wind up waiting *alot* longer than necessary. Right 218 * wind up waiting *a lot* longer than necessary. Right
219 * now we only use this in clear_inode so that's 219 * now we only use this in clear_inode so that's
220 * OK. */ 220 * OK. */
221 ocfs2_start_checkpoint(osb); 221 ocfs2_start_checkpoint(osb);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 28f2cc1080d8..e5d738cd9cc0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2128,7 +2128,7 @@ leave:
2128} 2128}
2129 2129
2130/** 2130/**
2131 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly 2131 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to receive a newly
2132 * allocated file. This is different from the typical 'add to orphan dir' 2132 * allocated file. This is different from the typical 'add to orphan dir'
2133 * operation in that the inode does not yet exist. This is a problem because 2133 * operation in that the inode does not yet exist. This is a problem because
2134 * the orphan dir stringifies the inode block number to come up with it's 2134 * the orphan dir stringifies the inode block number to come up with it's
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bf2e7764920e..938387a10d5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -441,7 +441,7 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
441struct ocfs2_block_check { 441struct ocfs2_block_check {
442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */ 442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
443 __le16 bc_ecc; /* Single-error-correction parity vector. 443 __le16 bc_ecc; /* Single-error-correction parity vector.
444 This is a simple Hamming code dependant 444 This is a simple Hamming code dependent
445 on the blocksize. OCFS2's maximum 445 on the blocksize. OCFS2's maximum
446 blocksize, 4K, requires 16 parity bits, 446 blocksize, 4K, requires 16 parity bits,
447 so we fit in __le16. */ 447 so we fit in __le16. */
@@ -750,7 +750,7 @@ struct ocfs2_dinode {
750 after an unclean 750 after an unclean
751 shutdown */ 751 shutdown */
752 } journal1; 752 } journal1;
753 } id1; /* Inode type dependant 1 */ 753 } id1; /* Inode type dependent 1 */
754/*C0*/ union { 754/*C0*/ union {
755 struct ocfs2_super_block i_super; 755 struct ocfs2_super_block i_super;
756 struct ocfs2_local_alloc i_lab; 756 struct ocfs2_local_alloc i_lab;
@@ -1019,7 +1019,7 @@ struct ocfs2_xattr_entry {
1019 __le16 xe_name_offset; /* byte offset from the 1st entry in the 1019 __le16 xe_name_offset; /* byte offset from the 1st entry in the
1020 local xattr storage(inode, xattr block or 1020 local xattr storage(inode, xattr block or
1021 xattr bucket). */ 1021 xattr bucket). */
1022 __u8 xe_name_len; /* xattr name len, does't include prefix. */ 1022 __u8 xe_name_len; /* xattr name len, doesn't include prefix. */
1023 __u8 xe_type; /* the low 7 bits indicate the name prefix 1023 __u8 xe_type; /* the low 7 bits indicate the name prefix
1024 * type and the highest bit indicates whether 1024 * type and the highest bit indicates whether
1025 * the EA is stored in the local storage. */ 1025 * the EA is stored in the local storage. */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 279aef68025b..92fcd575775a 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -556,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
556 spin_unlock(&dq_data_lock); 556 spin_unlock(&dq_data_lock);
557 err = ocfs2_qinfo_lock(info, freeing); 557 err = ocfs2_qinfo_lock(info, freeing);
558 if (err < 0) { 558 if (err < 0) {
559 mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" 559 mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
560 " (type=%d, id=%u)\n", dquot->dq_type, 560 " (type=%d, id=%u)\n", dquot->dq_type,
561 (unsigned)dquot->dq_id); 561 (unsigned)dquot->dq_id);
562 goto out; 562 goto out;
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index 1e49cc29d06c..42c2b804f3fd 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -29,7 +29,7 @@
29struct ocfs2_alloc_reservation { 29struct ocfs2_alloc_reservation {
30 struct rb_node r_node; 30 struct rb_node r_node;
31 31
32 unsigned int r_start; /* Begining of current window */ 32 unsigned int r_start; /* Beginning of current window */
33 unsigned int r_len; /* Length of the window */ 33 unsigned int r_len; /* Length of the window */
34 34
35 unsigned int r_last_len; /* Length of most recent alloc */ 35 unsigned int r_last_len; /* Length of most recent alloc */
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 8ce7398ae1d2..1ec56fdb8d0d 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -126,7 +126,7 @@ struct ocfs2_stack_operations {
126 * 126 *
127 * ->connect() must not return until it is guaranteed that 127 * ->connect() must not return until it is guaranteed that
128 * 128 *
129 * - Node down notifications for the filesystem will be recieved 129 * - Node down notifications for the filesystem will be received
130 * and passed to conn->cc_recovery_handler(). 130 * and passed to conn->cc_recovery_handler().
131 * - Locking requests for the filesystem will be processed. 131 * - Locking requests for the filesystem will be processed.
132 */ 132 */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ab6e2061074f..ba5d97e4a73e 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1511,7 +1511,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1511 max_bits = le16_to_cpu(gd->bg_bits); 1511 max_bits = le16_to_cpu(gd->bg_bits);
1512 1512
1513 /* Tail groups in cluster bitmaps which aren't cpg 1513 /* Tail groups in cluster bitmaps which aren't cpg
1514 * aligned are prone to partial extention by a failed 1514 * aligned are prone to partial extension by a failed
1515 * fs resize. If the file system resize never got to 1515 * fs resize. If the file system resize never got to
1516 * update the dinode cluster count, then we don't want 1516 * update the dinode cluster count, then we don't want
1517 * to trust any clusters past it, regardless of what 1517 * to trust any clusters past it, regardless of what
@@ -2459,7 +2459,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2459 /* The alloc_bh comes from ocfs2_free_dinode() or 2459 /* The alloc_bh comes from ocfs2_free_dinode() or
2460 * ocfs2_free_clusters(). The callers have all locked the 2460 * ocfs2_free_clusters(). The callers have all locked the
2461 * allocator and gotten alloc_bh from the lock call. This 2461 * allocator and gotten alloc_bh from the lock call. This
2462 * validates the dinode buffer. Any corruption that has happended 2462 * validates the dinode buffer. Any corruption that has happened
2463 * is a code bug. */ 2463 * is a code bug. */
2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 69fa11b35aa4..5a521c748859 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -78,7 +78,7 @@ static struct kmem_cache *ocfs2_inode_cachep = NULL;
78struct kmem_cache *ocfs2_dquot_cachep; 78struct kmem_cache *ocfs2_dquot_cachep;
79struct kmem_cache *ocfs2_qf_chunk_cachep; 79struct kmem_cache *ocfs2_qf_chunk_cachep;
80 80
81/* OCFS2 needs to schedule several differnt types of work which 81/* OCFS2 needs to schedule several different types of work which
82 * require cluster locking, disk I/O, recovery waits, etc. Since these 82 * require cluster locking, disk I/O, recovery waits, etc. Since these
83 * types of work tend to be heavy we avoid using the kernel events 83 * types of work tend to be heavy we avoid using the kernel events
84 * workqueue and schedule on our own. */ 84 * workqueue and schedule on our own. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 57a215dc2d9b..81ecf9c0bf0a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3554,7 +3554,7 @@ int ocfs2_xattr_set(struct inode *inode,
3554 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3554 down_write(&OCFS2_I(inode)->ip_xattr_sem);
3555 /* 3555 /*
3556 * Scan inode and external block to find the same name 3556 * Scan inode and external block to find the same name
3557 * extended attribute and collect search infomation. 3557 * extended attribute and collect search information.
3558 */ 3558 */
3559 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3559 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3560 if (ret) 3560 if (ret)
@@ -3578,7 +3578,7 @@ int ocfs2_xattr_set(struct inode *inode,
3578 goto cleanup; 3578 goto cleanup;
3579 } 3579 }
3580 3580
3581 /* Check whether the value is refcounted and do some prepartion. */ 3581 /* Check whether the value is refcounted and do some preparation. */
3582 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3582 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3583 (!xis.not_found || !xbs.not_found)) { 3583 (!xis.not_found || !xbs.not_found)) {
3584 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3584 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ac546975031f..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -500,7 +500,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
500 /* everything is up and running, commence */ 500 /* everything is up and running, commence */
501 rcu_assign_pointer(ptbl->part[partno], p); 501 rcu_assign_pointer(ptbl->part[partno], p);
502 502
503 /* suppress uevent if the disk supresses it */ 503 /* suppress uevent if the disk suppresses it */
504 if (!dev_get_uevent_suppress(ddev)) 504 if (!dev_get_uevent_suppress(ddev))
505 kobject_uevent(&pdev->kobj, KOBJ_ADD); 505 kobject_uevent(&pdev->kobj, KOBJ_ADD);
506 506
@@ -585,7 +585,7 @@ rescan:
585 /* 585 /*
586 * If any partition code tried to read beyond EOD, try 586 * If any partition code tried to read beyond EOD, try
587 * unlocking native capacity even if partition table is 587 * unlocking native capacity even if partition table is
588 * sucessfully read as we could be missing some partitions. 588 * successfully read as we could be missing some partitions.
589 */ 589 */
590 if (state->access_beyond_eod) { 590 if (state->access_beyond_eod) {
591 printk(KERN_WARNING 591 printk(KERN_WARNING
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index ac0ccb5026a2..19d6750d1d6c 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
348 goto fail; 348 goto fail;
349 } 349 }
350 350
351 /* Check that sizeof_partition_entry has the correct value */
352 if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
353 pr_debug("GUID Partitition Entry Size check failed.\n");
354 goto fail;
355 }
356
351 if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) 357 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
352 goto fail; 358 goto fail;
353 359
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index b10e3540d5b7..ce4f62440425 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1299 1299
1300 BUG_ON (!data || !frags); 1300 BUG_ON (!data || !frags);
1301 1301
1302 if (size < 2 * VBLK_SIZE_HEAD) {
1303 ldm_error("Value of size is to small.");
1304 return false;
1305 }
1306
1302 group = get_unaligned_be32(data + 0x08); 1307 group = get_unaligned_be32(data + 0x08);
1303 rec = get_unaligned_be16(data + 0x0C); 1308 rec = get_unaligned_be16(data + 0x0C);
1304 num = get_unaligned_be16(data + 0x0E); 1309 num = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1306 ldm_error ("A VBLK claims to have %d parts.", num); 1311 ldm_error ("A VBLK claims to have %d parts.", num);
1307 return false; 1312 return false;
1308 } 1313 }
1314 if (rec >= num) {
1315 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1316 return false;
1317 }
1309 1318
1310 list_for_each (item, frags) { 1319 list_for_each (item, frags) {
1311 f = list_entry (item, struct frag, list); 1320 f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
1334 1343
1335 f->map |= (1 << rec); 1344 f->map |= (1 << rec);
1336 1345
1337 if (num > 0) { 1346 data += VBLK_SIZE_HEAD;
1338 data += VBLK_SIZE_HEAD; 1347 size -= VBLK_SIZE_HEAD;
1339 size -= VBLK_SIZE_HEAD; 1348
1340 }
1341 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); 1349 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1342 1350
1343 return true; 1351 return true;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5a670c11aeac..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -220,7 +220,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task)
220 } 220 }
221 221
222 /* 222 /*
223 * Noone else is allowed. 223 * No one else is allowed.
224 */ 224 */
225 mmput(mm); 225 mmput(mm);
226 return ERR_PTR(-EPERM); 226 return ERR_PTR(-EPERM);
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3124/* for the /proc/ directory itself, after non-process stuff has been done */ 3124/* for the /proc/ directory itself, after non-process stuff has been done */
3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3126{ 3126{
3127 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3127 unsigned int nr;
3128 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3128 struct task_struct *reaper;
3129 struct tgid_iter iter; 3129 struct tgid_iter iter;
3130 struct pid_namespace *ns; 3130 struct pid_namespace *ns;
3131 3131
3132 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3133 goto out_no_task;
3134 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3135
3136 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3132 if (!reaper) 3137 if (!reaper)
3133 goto out_no_task; 3138 goto out_no_task;
3134 3139
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..318d8654989b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
214 int flags = vma->vm_flags; 214 int flags = vma->vm_flags;
215 unsigned long ino = 0; 215 unsigned long ino = 0;
216 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
217 unsigned long start; 217 unsigned long start, end;
218 dev_t dev = 0; 218 dev_t dev = 0;
219 int len; 219 int len;
220 220
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
227 227
228 /* We don't show the stack guard page in /proc/maps */ 228 /* We don't show the stack guard page in /proc/maps */
229 start = vma->vm_start; 229 start = vma->vm_start;
230 if (vma->vm_flags & VM_GROWSDOWN) 230 if (stack_guard_page_start(vma, start))
231 if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) 231 start += PAGE_SIZE;
232 start += PAGE_SIZE; 232 end = vma->vm_end;
233 if (stack_guard_page_end(vma, end))
234 end -= PAGE_SIZE;
233 235
234 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
235 start, 237 start,
236 vma->vm_end, 238 end,
237 flags & VM_READ ? 'r' : '-', 239 flags & VM_READ ? 'r' : '-',
238 flags & VM_WRITE ? 'w' : '-', 240 flags & VM_WRITE ? 'w' : '-',
239 flags & VM_EXEC ? 'x' : '-', 241 flags & VM_EXEC ? 'x' : '-',
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 867d0ac026ce..8007ae7c0d8c 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,5 +1,5 @@
1config PSTORE 1config PSTORE
2 bool "Persistant store support" 2 bool "Persistent store support"
3 default n 3 default n
4 help 4 help
5 This option enables generic access to platform level 5 This option enables generic access to platform level
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
152void pstore_get_records(void) 152void pstore_get_records(void)
153{ 153{
154 struct pstore_info *psi = psinfo; 154 struct pstore_info *psi = psinfo;
155 size_t size; 155 ssize_t size;
156 u64 id; 156 u64 id;
157 enum pstore_type_id type; 157 enum pstore_type_id type;
158 struct timespec time; 158 struct timespec time;
159 int failed = 0; 159 int failed = 0, rc;
160 160
161 if (!psi) 161 if (!psi)
162 return; 162 return;
163 163
164 mutex_lock(&psinfo->buf_mutex); 164 mutex_lock(&psinfo->buf_mutex);
165 rc = psi->open(psi);
166 if (rc)
167 goto out;
168
165 while ((size = psi->read(&id, &type, &time)) > 0) { 169 while ((size = psi->read(&id, &type, &time)) > 0) {
166 if (pstore_mkfile(type, psi->name, id, psi->buf, size, 170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
167 time, psi->erase)) 171 time, psi->erase))
168 failed++; 172 failed++;
169 } 173 }
174 psi->close(psi);
175out:
170 mutex_unlock(&psinfo->buf_mutex); 176 mutex_unlock(&psinfo->buf_mutex);
171 177
172 if (failed) 178 if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fcc8ae75d874..d3c032f5fa0a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(dquot_acquire);
442 */ 442 */
443int dquot_commit(struct dquot *dquot) 443int dquot_commit(struct dquot *dquot)
444{ 444{
445 int ret = 0, ret2 = 0; 445 int ret = 0;
446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); 446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
447 447
448 mutex_lock(&dqopt->dqio_mutex); 448 mutex_lock(&dqopt->dqio_mutex);
@@ -454,15 +454,10 @@ int dquot_commit(struct dquot *dquot)
454 spin_unlock(&dq_list_lock); 454 spin_unlock(&dq_list_lock);
455 /* Inactive dquot can be only if there was error during read/init 455 /* Inactive dquot can be only if there was error during read/init
456 * => we have better not writing it */ 456 * => we have better not writing it */
457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { 457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
459 if (info_dirty(&dqopt->info[dquot->dq_type])) { 459 else
460 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 460 ret = -EIO;
461 dquot->dq_sb, dquot->dq_type);
462 }
463 if (ret >= 0)
464 ret = ret2;
465 }
466out_sem: 461out_sem:
467 mutex_unlock(&dqopt->dqio_mutex); 462 mutex_unlock(&dqopt->dqio_mutex);
468 return ret; 463 return ret;
@@ -956,7 +951,7 @@ static inline int dqput_blocks(struct dquot *dquot)
956 951
957/* 952/*
958 * Remove references to dquots from inode and add dquot to list for freeing 953 * Remove references to dquots from inode and add dquot to list for freeing
959 * if we have the last referece to dquot 954 * if we have the last reference to dquot
960 * We can't race with anybody because we hold dqptr_sem for writing... 955 * We can't race with anybody because we hold dqptr_sem for writing...
961 */ 956 */
962static int remove_inode_dquot_ref(struct inode *inode, int type, 957static int remove_inode_dquot_ref(struct inode *inode, int type,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 SetPageDirty(page); 112 SetPageDirty(page);
113 113
114 unlock_page(page); 114 unlock_page(page);
115 put_page(page);
115 } 116 }
116 117
117 return 0; 118 return 0;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c77514bd5776..c5e82ece7c6c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,7 +1,7 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2** Write ahead logging implementation copyright Chris Mason 2000
3** 3**
4** The background commits make this code very interelated, and 4** The background commits make this code very interrelated, and
5** overly complex. I need to rethink things a bit....The major players: 5** overly complex. I need to rethink things a bit....The major players:
6** 6**
7** journal_begin -- call with the number of blocks you expect to log. 7** journal_begin -- call with the number of blocks you expect to log.
@@ -2725,7 +2725,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2725 REISERFS_DISK_OFFSET_IN_BYTES / 2725 REISERFS_DISK_OFFSET_IN_BYTES /
2726 sb->s_blocksize + 2); 2726 sb->s_blocksize + 2);
2727 2727
2728 /* Sanity check to see is the standard journal fitting withing first bitmap 2728 /* Sanity check to see is the standard journal fitting within first bitmap
2729 (actual for small blocksizes) */ 2729 (actual for small blocksizes) */
2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index b87aa2c1afc1..7df1ce48203a 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -15,7 +15,7 @@
15 * for this mutex, no need for a system wide mutex facility. 15 * for this mutex, no need for a system wide mutex facility.
16 * 16 *
17 * Also this lock is often released before a call that could block because 17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule() 18 * reiserfs performances were partially based on the release while schedule()
19 * property of the Bkl. 19 * property of the Bkl.
20 */ 20 */
21void reiserfs_write_lock(struct super_block *s) 21void reiserfs_write_lock(struct super_block *s)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0aab04f46827..b216ff6be1c9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -393,7 +393,7 @@ void add_save_link(struct reiserfs_transaction_handle *th,
393 /* body of "save" link */ 393 /* body of "save" link */
394 link = INODE_PKEY(inode)->k_dir_id; 394 link = INODE_PKEY(inode)->k_dir_id;
395 395
396 /* put "save" link inot tree, don't charge quota to anyone */ 396 /* put "save" link into tree, don't charge quota to anyone */
397 retval = 397 retval =
398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); 398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
399 if (retval) { 399 if (retval) {
@@ -2104,7 +2104,7 @@ out:
2104 2104
2105/* Read data from quotafile - avoid pagecache and such because we cannot afford 2105/* Read data from quotafile - avoid pagecache and such because we cannot afford
2106 * acquiring the locks... As quota files are never truncated and quota code 2106 * acquiring the locks... As quota files are never truncated and quota code
2107 * itself serializes the operations (and noone else should touch the files) 2107 * itself serializes the operations (and no one else should touch the files)
2108 * we don't have to be afraid of races */ 2108 * we don't have to be afraid of races */
2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2110 size_t len, loff_t off) 2110 size_t len, loff_t off)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5c11ca82b782..47d2a4498b03 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -396,7 +396,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
396 struct address_space *mapping = dir->i_mapping; 396 struct address_space *mapping = dir->i_mapping;
397 struct page *page; 397 struct page *page;
398 /* We can deadlock if we try to free dentries, 398 /* We can deadlock if we try to free dentries,
399 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 399 and an unlink/rmdir has just occurred - GFP_NOFS avoids this */
400 mapping_set_gfp_mask(mapping, GFP_NOFS); 400 mapping_set_gfp_mask(mapping, GFP_NOFS);
401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); 401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
402 if (!IS_ERR(page)) { 402 if (!IS_ERR(page)) {
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 26b15ae34d6f..c37b520132ff 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -104,7 +104,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
104 entry = &cache->entry[i]; 104 entry = &cache->entry[i];
105 105
106 /* 106 /*
107 * Initialise choosen cache entry, and fill it in from 107 * Initialise chosen cache entry, and fill it in from
108 * disk. 108 * disk.
109 */ 109 */
110 cache->unused--; 110 cache->unused--;
@@ -286,7 +286,7 @@ cleanup:
286 286
287 287
288/* 288/*
289 * Copy upto length bytes from cache entry to buffer starting at offset bytes 289 * Copy up to length bytes from cache entry to buffer starting at offset bytes
290 * into the cache entry. If there's not length bytes then copy the number of 290 * into the cache entry. If there's not length bytes then copy the number of
291 * bytes available. In all cases return the number of bytes copied. 291 * bytes available. In all cases return the number of bytes copied.
292 */ 292 */
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
949 * but s_maxbytes was an unsigned long long for many releases. Throw 949 * but s_maxbytes was an unsigned long long for many releases. Throw
950 * this warning for a little while to try and catch filesystems that 950 * this warning for a little while to try and catch filesystems that
951 * violate this rule. This warning should be either removed or 951 * violate this rule.
952 * converted to a BUG() in 2.6.34.
953 */ 952 */
954 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 953 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
955 "negative value (%lld)\n", type->name, sb->s_maxbytes); 954 "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
24 24
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
34/* 27/*
35 * There's one sysfs_buffer for each open file and one 28 * There's one sysfs_buffer for each open file and one
36 * sysfs_open_dirent for each sysfs_dirent with one or more open 29 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
337 struct sysfs_buffer *buffer; 330 struct sysfs_buffer *buffer;
338 const struct sysfs_ops *ops; 331 const struct sysfs_ops *ops;
339 int error = -EACCES; 332 int error = -EACCES;
340 char *p;
341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 333
346 /* need attr_sd for attr and ops, its parent for kobj */ 334 /* need attr_sd for attr and ops, its parent for kobj */
347 if (!sysfs_get_active(attr_sd)) 335 if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
101} 101}
102 102
103/** 103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group 104 * sysfs_update_group - given a directory kobject, update an attribute group
105 * @kobj: The kobject to create the group on 105 * @kobj: The kobject to update the group on
106 * @grp: The attribute group to create 106 * @grp: The attribute group to update
107 * 107 *
108 * This function updates an attribute group. Unlike 108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any 109 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index d7440904be17..f8b0160da2da 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -47,7 +47,7 @@ config UBIFS_FS_DEBUG
47 bool "Enable debugging support" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS
51 help 51 help
52 This option enables UBIFS debugging support. It makes sure various 52 This option enables UBIFS debugging support. It makes sure various
53 assertions, self-checks, debugging messages and test modes are compiled 53 assertions, self-checks, debugging messages and test modes are compiled
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index c8ff0d1ae5d3..8b3a7da531eb 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c)
147 if (liab2 < liab1) 147 if (liab2 < liab1)
148 return -EAGAIN; 148 return -EAGAIN;
149 149
150 dbg_budg("new liability %lld (not shrinked)", liab2); 150 dbg_budg("new liability %lld (not shrunk)", liab2);
151 151
152 /* Liability did not shrink again, try GC */ 152 /* Liability did not shrink again, try GC */
153 dbg_budg("Run GC"); 153 dbg_budg("Run GC");
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b148fbc80f8d..1bd01ded7123 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -577,7 +577,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
577 size_t sz; 577 size_t sz;
578 578
579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
580 goto out; 580 return 0;
581 581
582 INIT_LIST_HEAD(&list); 582 INIT_LIST_HEAD(&list);
583 583
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index f25a7339f800..004d3745dc45 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -972,11 +972,39 @@ void dbg_dump_index(struct ubifs_info *c)
972void dbg_save_space_info(struct ubifs_info *c) 972void dbg_save_space_info(struct ubifs_info *c)
973{ 973{
974 struct ubifs_debug_info *d = c->dbg; 974 struct ubifs_debug_info *d = c->dbg;
975 975 int freeable_cnt;
976 ubifs_get_lp_stats(c, &d->saved_lst);
977 976
978 spin_lock(&c->space_lock); 977 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
979
980 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it
982 * affects the free space calculations, and UBIFS might not know about
983 * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
984 * only when we read their lprops, and we do this only lazily, upon the
985 * need. So at any given point of time @c->freeable_cnt might be not
986 * exactly accurate.
987 *
988 * Just one example about the issue we hit when we did not zero
989 * @c->freeable_cnt.
990 * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
991 * amount of free space in @d->saved_free
992 * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
993 * information from flash, where we cache LEBs from various
994 * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
995 * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
996 * -> 'ubifs_get_pnode()' -> 'update_cats()'
997 * -> 'ubifs_add_to_cat()').
998 * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
999 * becomes %1.
1000 * 4. We calculate the amount of free space when the re-mount is
1001 * finished in 'dbg_check_space_info()' and it does not match
1002 * @d->saved_free.
1003 */
1004 freeable_cnt = c->freeable_cnt;
1005 c->freeable_cnt = 0;
979 d->saved_free = ubifs_get_free_space_nolock(c); 1006 d->saved_free = ubifs_get_free_space_nolock(c);
1007 c->freeable_cnt = freeable_cnt;
980 spin_unlock(&c->space_lock); 1008 spin_unlock(&c->space_lock);
981} 1009}
982 1010
@@ -993,12 +1021,15 @@ int dbg_check_space_info(struct ubifs_info *c)
993{ 1021{
994 struct ubifs_debug_info *d = c->dbg; 1022 struct ubifs_debug_info *d = c->dbg;
995 struct ubifs_lp_stats lst; 1023 struct ubifs_lp_stats lst;
996 long long avail, free; 1024 long long free;
1025 int freeable_cnt;
997 1026
998 spin_lock(&c->space_lock); 1027 spin_lock(&c->space_lock);
999 avail = ubifs_calc_available(c, c->min_idx_lebs); 1028 freeable_cnt = c->freeable_cnt;
1029 c->freeable_cnt = 0;
1030 free = ubifs_get_free_space_nolock(c);
1031 c->freeable_cnt = freeable_cnt;
1000 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
1001 free = ubifs_get_free_space(c);
1002 1033
1003 if (free != d->saved_free) { 1034 if (free != d->saved_free) {
1004 ubifs_err("free space changed from %lld to %lld", 1035 ubifs_err("free space changed from %lld to %lld",
@@ -2806,40 +2837,38 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2806 struct ubifs_debug_info *d = c->dbg; 2837 struct ubifs_debug_info *d = c->dbg;
2807 2838
2808 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2839 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2809 d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); 2840 fname = d->dfs_dir_name;
2810 if (IS_ERR(d->dfs_dir)) { 2841 dent = debugfs_create_dir(fname, dfs_rootdir);
2811 err = PTR_ERR(d->dfs_dir); 2842 if (IS_ERR_OR_NULL(dent))
2812 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2813 d->dfs_dir_name, err);
2814 goto out; 2843 goto out;
2815 } 2844 d->dfs_dir = dent;
2816 2845
2817 fname = "dump_lprops"; 2846 fname = "dump_lprops";
2818 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2847 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2819 if (IS_ERR(dent)) 2848 if (IS_ERR_OR_NULL(dent))
2820 goto out_remove; 2849 goto out_remove;
2821 d->dfs_dump_lprops = dent; 2850 d->dfs_dump_lprops = dent;
2822 2851
2823 fname = "dump_budg"; 2852 fname = "dump_budg";
2824 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2853 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2825 if (IS_ERR(dent)) 2854 if (IS_ERR_OR_NULL(dent))
2826 goto out_remove; 2855 goto out_remove;
2827 d->dfs_dump_budg = dent; 2856 d->dfs_dump_budg = dent;
2828 2857
2829 fname = "dump_tnc"; 2858 fname = "dump_tnc";
2830 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2859 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2831 if (IS_ERR(dent)) 2860 if (IS_ERR_OR_NULL(dent))
2832 goto out_remove; 2861 goto out_remove;
2833 d->dfs_dump_tnc = dent; 2862 d->dfs_dump_tnc = dent;
2834 2863
2835 return 0; 2864 return 0;
2836 2865
2837out_remove: 2866out_remove:
2838 err = PTR_ERR(dent);
2839 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2840 fname, err);
2841 debugfs_remove_recursive(d->dfs_dir); 2867 debugfs_remove_recursive(d->dfs_dir);
2842out: 2868out:
2869 err = dent ? PTR_ERR(dent) : -ENODEV;
2870 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2871 fname, err);
2843 return err; 2872 return err;
2844} 2873}
2845 2874
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
23#ifndef __UBIFS_DEBUG_H__ 23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__ 24#define __UBIFS_DEBUG_H__
25 25
26/* Checking helper functions */
27typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
28 struct ubifs_zbranch *zbr, void *priv);
29typedef int (*dbg_znode_callback)(struct ubifs_info *c,
30 struct ubifs_znode *znode, void *priv);
31
26#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
27 33
28/** 34/**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
270void dbg_dump_index(struct ubifs_info *c); 276void dbg_dump_index(struct ubifs_info *c);
271void dbg_dump_lpt_lebs(const struct ubifs_info *c); 277void dbg_dump_lpt_lebs(const struct ubifs_info *c);
272 278
273/* Checking helper functions */
274typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
275 struct ubifs_zbranch *zbr, void *priv);
276typedef int (*dbg_znode_callback)(struct ubifs_info *c,
277 struct ubifs_znode *znode, void *priv);
278int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 279int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
279 dbg_znode_callback znode_cb, void *priv); 280 dbg_znode_callback znode_cb, void *priv);
280 281
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
295int dbg_check_filesystem(struct ubifs_info *c); 296int dbg_check_filesystem(struct ubifs_info *c);
296void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 297void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
297 int add_pos); 298 int add_pos);
298int dbg_check_lprops(struct ubifs_info *c);
299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
300 int row, int col); 300 int row, int col);
301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, 301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
401#define DBGKEY(key) ((char *)(key)) 401#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 402#define DBGKEY1(key) ((char *)(key))
403 403
404#define ubifs_debugging_init(c) 0 404static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
405#define ubifs_debugging_exit(c) ({}) 405static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
406 406static inline const char *dbg_ntype(int type) { return ""; }
407#define dbg_ntype(type) "" 407static inline const char *dbg_cstate(int cmt_state) { return ""; }
408#define dbg_cstate(cmt_state) "" 408static inline const char *dbg_jhead(int jhead) { return ""; }
409#define dbg_jhead(jhead) "" 409static inline const char *
410#define dbg_get_key_dump(c, key) ({}) 410dbg_get_key_dump(const struct ubifs_info *c,
411#define dbg_dump_inode(c, inode) ({}) 411 const union ubifs_key *key) { return ""; }
412#define dbg_dump_node(c, node) ({}) 412static inline void dbg_dump_inode(const struct ubifs_info *c,
413#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) 413 const struct inode *inode) { return; }
414#define dbg_dump_budget_req(req) ({}) 414static inline void dbg_dump_node(const struct ubifs_info *c,
415#define dbg_dump_lstats(lst) ({}) 415 const void *node) { return; }
416#define dbg_dump_budg(c) ({}) 416static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
417#define dbg_dump_lprop(c, lp) ({}) 417 void *node, int lnum,
418#define dbg_dump_lprops(c) ({}) 418 int offs) { return; }
419#define dbg_dump_lpt_info(c) ({}) 419static inline void
420#define dbg_dump_leb(c, lnum) ({}) 420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421#define dbg_dump_znode(c, znode) ({}) 421static inline void
422#define dbg_dump_heap(c, heap, cat) ({}) 422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 423static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
424#define dbg_dump_tnc(c) ({}) 424static inline void dbg_dump_lprop(const struct ubifs_info *c,
425#define dbg_dump_index(c) ({}) 425 const struct ubifs_lprops *lp) { return; }
426#define dbg_dump_lpt_lebs(c) ({}) 426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
427 427static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
428#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 428static inline void dbg_dump_leb(const struct ubifs_info *c,
429#define dbg_old_index_check_init(c, zroot) 0 429 int lnum) { return; }
430#define dbg_save_space_info(c) ({}) 430static inline void
431#define dbg_check_space_info(c) 0 431dbg_dump_znode(const struct ubifs_info *c,
432#define dbg_check_old_index(c, zroot) 0 432 const struct ubifs_znode *znode) { return; }
433#define dbg_check_cats(c) 0 433static inline void dbg_dump_heap(struct ubifs_info *c,
434#define dbg_check_ltab(c) 0 434 struct ubifs_lpt_heap *heap,
435#define dbg_chk_lpt_free_spc(c) 0 435 int cat) { return; }
436#define dbg_chk_lpt_sz(c, action, len) 0 436static inline void dbg_dump_pnode(struct ubifs_info *c,
437#define dbg_check_synced_i_size(inode) 0 437 struct ubifs_pnode *pnode,
438#define dbg_check_dir_size(c, dir) 0 438 struct ubifs_nnode *parent,
439#define dbg_check_tnc(c, x) 0 439 int iip) { return; }
440#define dbg_check_idx_size(c, idx_size) 0 440static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
441#define dbg_check_filesystem(c) 0 441static inline void dbg_dump_index(struct ubifs_info *c) { return; }
442#define dbg_check_heap(c, heap, cat, add_pos) ({}) 442static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
443#define dbg_check_lprops(c) 0 443
444#define dbg_check_lpt_nodes(c, cnode, row, col) 0 444static inline int dbg_walk_index(struct ubifs_info *c,
445#define dbg_check_inode_size(c, inode, size) 0 445 dbg_leaf_callback leaf_cb,
446#define dbg_check_data_nodes_order(c, head) 0 446 dbg_znode_callback znode_cb,
447#define dbg_check_nondata_nodes_order(c, head) 0 447 void *priv) { return 0; }
448#define dbg_force_in_the_gaps_enabled 0 448static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
449#define dbg_force_in_the_gaps() 0 449static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
450#define dbg_failure_mode 0 450static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
451 451static inline int
452#define dbg_debugfs_init() 0 452dbg_old_index_check_init(struct ubifs_info *c,
453#define dbg_debugfs_exit() 453 struct ubifs_zbranch *zroot) { return 0; }
454#define dbg_debugfs_init_fs(c) 0 454static inline int
455#define dbg_debugfs_exit_fs(c) 0 455dbg_check_old_index(struct ubifs_info *c,
456 struct ubifs_zbranch *zroot) { return 0; }
457static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
458static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
459static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
460static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
461 int action, int len) { return 0; }
462static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
463static inline int dbg_check_dir_size(struct ubifs_info *c,
464 const struct inode *dir) { return 0; }
465static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
466static inline int dbg_check_idx_size(struct ubifs_info *c,
467 long long idx_size) { return 0; }
468static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
469static inline void dbg_check_heap(struct ubifs_info *c,
470 struct ubifs_lpt_heap *heap,
471 int cat, int add_pos) { return; }
472static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
473 struct ubifs_cnode *cnode, int row, int col) { return 0; }
474static inline int dbg_check_inode_size(struct ubifs_info *c,
475 const struct inode *inode,
476 loff_t size) { return 0; }
477static inline int
478dbg_check_data_nodes_order(struct ubifs_info *c,
479 struct list_head *head) { return 0; }
480static inline int
481dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; }
483
484static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0
486#define dbg_failure_mode 0
487
488static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; }
490static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
491static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
456 492
457#endif /* !CONFIG_UBIFS_FS_DEBUG */ 493#endif /* !CONFIG_UBIFS_FS_DEBUG */
458#endif /* !__UBIFS_DEBUG_H__ */ 494#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 28be1e6a65e8..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY)
1316 return 0;
1317
1315 /* 1318 /*
1316 * VFS has already synchronized dirty pages for this inode. Synchronize 1319 * VFS has already synchronized dirty pages for this inode. Synchronize
1317 * the inode unless this is a 'datasync()' call. 1320 * the inode unless this is a 'datasync()' call.
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..40fa780ebea7 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -175,26 +175,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
175} 175}
176 176
177/** 177/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log. 178 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object 179 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to 180 * @jhead: journal head the bud belongs to
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 72775d35b99e..ef5155e109a2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1270,10 +1270,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1270 lnum = branch->lnum; 1270 lnum = branch->lnum;
1271 offs = branch->offs; 1271 offs = branch->offs;
1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); 1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1273 if (!pnode) { 1273 if (!pnode)
1274 err = -ENOMEM; 1274 return -ENOMEM;
1275 goto out; 1275
1276 }
1277 if (lnum == 0) { 1276 if (lnum == 0) {
1278 /* 1277 /*
1279 * This pnode was not written which just means that the LEB 1278 * This pnode was not written which just means that the LEB
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 936f2cbfe6b6..3dbad6fbd1eb 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -317,6 +317,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
317 goto out_free; 317 goto out_free;
318 } 318 }
319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); 319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
320
321 /*
322 * We had to recover the master node, which means there was an
323 * unclean reboot. However, it is possible that the master node
324 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
325 * E.g., consider the following chain of events:
326 *
327 * 1. UBIFS was cleanly unmounted, so the master node is clean
328 * 2. UBIFS is being mounted R/W and starts changing the master
329 * node in the first (%UBIFS_MST_LNUM). A power cut happens,
330 * so this LEB ends up with some amount of garbage at the
331 * end.
332 * 3. UBIFS is being mounted R/O. We reach this place and
333 * recover the master node from the second LEB
334 * (%UBIFS_MST_LNUM + 1). But we cannot update the media
335 * because we are being mounted R/O. We have to defer the
336 * operation.
337 * 4. However, this master node (@c->mst_node) is marked as
338 * clean (since the step 1). And if we just return, the
339 * mount code will be confused and won't recover the master
340 * node when it is re-mounter R/W later.
341 *
342 * Thus, to force the recovery by marking the master node as
343 * dirty.
344 */
345 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
320 } else { 346 } else {
321 /* Write the recovered master node */ 347 /* Write the recovered master node */
322 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; 348 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..d3d6d365bfc1 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -59,6 +59,7 @@ enum {
59 * @new_size: truncation new size 59 * @new_size: truncation new size
60 * @free: amount of free space in a bud 60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * @jhead: journal head number of the bud
62 * 63 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC. 65 * build a tree of node information to insert into the TNC.
@@ -80,6 +81,7 @@ struct replay_entry {
80 struct { 81 struct {
81 int free; 82 int free;
82 int dirty; 83 int dirty;
84 int jhead;
83 }; 85 };
84 }; 86 };
85}; 87};
@@ -159,6 +161,11 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
159 err = PTR_ERR(lp); 161 err = PTR_ERR(lp);
160 goto out; 162 goto out;
161 } 163 }
164
165 /* Make sure the journal head points to the latest bud */
166 err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
167 c->leb_size - r->free, UBI_SHORTTERM);
168
162out: 169out:
163 ubifs_release_lprops(c); 170 ubifs_release_lprops(c);
164 return err; 171 return err;
@@ -627,10 +634,6 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
627 ubifs_assert(sleb->endpt - offs >= used); 634 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 635 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629 636
630 if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
631 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
632 sleb->endpt, UBI_SHORTTERM);
633
634 *dirty = sleb->endpt - offs - used; 637 *dirty = sleb->endpt - offs - used;
635 *free = c->leb_size - sleb->endpt; 638 *free = c->leb_size - sleb->endpt;
636 639
@@ -653,12 +656,14 @@ out_dump:
653 * @sqnum: sequence number 656 * @sqnum: sequence number
654 * @free: amount of free space in bud 657 * @free: amount of free space in bud
655 * @dirty: amount of dirty space from padding and deletion nodes 658 * @dirty: amount of dirty space from padding and deletion nodes
659 * @jhead: journal head number for the bud
656 * 660 *
657 * This function inserts a reference node to the replay tree and returns zero 661 * This function inserts a reference node to the replay tree and returns zero
658 * in case of success or a negative error code in case of failure. 662 * in case of success or a negative error code in case of failure.
659 */ 663 */
660static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 664static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
661 unsigned long long sqnum, int free, int dirty) 665 unsigned long long sqnum, int free, int dirty,
666 int jhead)
662{ 667{
663 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 668 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
664 struct replay_entry *r; 669 struct replay_entry *r;
@@ -688,6 +693,7 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
688 r->flags = REPLAY_REF; 693 r->flags = REPLAY_REF;
689 r->free = free; 694 r->free = free;
690 r->dirty = dirty; 695 r->dirty = dirty;
696 r->jhead = jhead;
691 697
692 rb_link_node(&r->rb, parent, p); 698 rb_link_node(&r->rb, parent, p);
693 rb_insert_color(&r->rb, &c->replay_tree); 699 rb_insert_color(&r->rb, &c->replay_tree);
@@ -712,7 +718,7 @@ static int replay_buds(struct ubifs_info *c)
712 if (err) 718 if (err)
713 return err; 719 return err;
714 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 720 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
715 free, dirty); 721 free, dirty, b->bud->jhead);
716 if (err) 722 if (err)
717 return err; 723 return err;
718 } 724 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6ddd9973e681..04ad07f4fcc3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
1257 goto out_free; 1257 goto out_free;
1258 } 1258 }
1259 1259
1260 err = alloc_wbufs(c);
1261 if (err)
1262 goto out_cbuf;
1263
1260 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1264 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1261 if (!c->ro_mount) { 1265 if (!c->ro_mount) {
1262 err = alloc_wbufs(c);
1263 if (err)
1264 goto out_cbuf;
1265
1266 /* Create background thread */ 1266 /* Create background thread */
1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1268 if (IS_ERR(c->bgt)) { 1268 if (IS_ERR(c->bgt)) {
@@ -1568,6 +1568,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1568 mutex_lock(&c->umount_mutex); 1568 mutex_lock(&c->umount_mutex);
1569 dbg_save_space_info(c); 1569 dbg_save_space_info(c);
1570 c->remounting_rw = 1; 1570 c->remounting_rw = 1;
1571 c->ro_mount = 0;
1571 1572
1572 err = check_free_space(c); 1573 err = check_free_space(c);
1573 if (err) 1574 if (err)
@@ -1630,12 +1631,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1630 if (err) 1631 if (err)
1631 goto out; 1632 goto out;
1632 1633
1633 err = alloc_wbufs(c);
1634 if (err)
1635 goto out;
1636
1637 ubifs_create_buds_lists(c);
1638
1639 /* Create background thread */ 1634 /* Create background thread */
1640 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1635 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1641 if (IS_ERR(c->bgt)) { 1636 if (IS_ERR(c->bgt)) {
@@ -1670,19 +1665,30 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1670 if (err) 1665 if (err)
1671 goto out; 1666 goto out;
1672 1667
1668 dbg_gen("re-mounted read-write");
1669 c->remounting_rw = 0;
1670
1673 if (c->need_recovery) { 1671 if (c->need_recovery) {
1674 c->need_recovery = 0; 1672 c->need_recovery = 0;
1675 ubifs_msg("deferred recovery completed"); 1673 ubifs_msg("deferred recovery completed");
1674 } else {
1675 /*
1676 * Do not run the debugging space check if the were doing
1677 * recovery, because when we saved the information we had the
1678 * file-system in a state where the TNC and lprops has been
1679 * modified in memory, but all the I/O operations (including a
1680 * commit) were deferred. So the file-system was in
1681 * "non-committed" state. Now the file-system is in committed
1682 * state, and of course the amount of free space will change
1683 * because, for example, the old index size was imprecise.
1684 */
1685 err = dbg_check_space_info(c);
1676 } 1686 }
1677
1678 dbg_gen("re-mounted read-write");
1679 c->ro_mount = 0;
1680 c->remounting_rw = 0;
1681 err = dbg_check_space_info(c);
1682 mutex_unlock(&c->umount_mutex); 1687 mutex_unlock(&c->umount_mutex);
1683 return err; 1688 return err;
1684 1689
1685out: 1690out:
1691 c->ro_mount = 1;
1686 vfree(c->orph_buf); 1692 vfree(c->orph_buf);
1687 c->orph_buf = NULL; 1693 c->orph_buf = NULL;
1688 if (c->bgt) { 1694 if (c->bgt) {
@@ -1732,7 +1738,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1732 if (err) 1738 if (err)
1733 ubifs_ro_mode(c, err); 1739 ubifs_ro_mode(c, err);
1734 1740
1735 free_wbufs(c);
1736 vfree(c->orph_buf); 1741 vfree(c->orph_buf);
1737 c->orph_buf = NULL; 1742 c->orph_buf = NULL;
1738 kfree(c->write_reserve_buf); 1743 kfree(c->write_reserve_buf);
@@ -1760,10 +1765,12 @@ static void ubifs_put_super(struct super_block *sb)
1760 * of the media. For example, there will be dirty inodes if we failed 1765 * of the media. For example, there will be dirty inodes if we failed
1761 * to write them back because of I/O errors. 1766 * to write them back because of I/O errors.
1762 */ 1767 */
1763 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1768 if (!c->ro_error) {
1764 ubifs_assert(c->budg_idx_growth == 0); 1769 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1765 ubifs_assert(c->budg_dd_growth == 0); 1770 ubifs_assert(c->budg_idx_growth == 0);
1766 ubifs_assert(c->budg_data_growth == 0); 1771 ubifs_assert(c->budg_dd_growth == 0);
1772 ubifs_assert(c->budg_data_growth == 0);
1773 }
1767 1774
1768 /* 1775 /*
1769 * The 'c->umount_lock' prevents races between UBIFS memory shrinker 1776 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f88fe0..3299f469e712 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
56 */ 56 */
57 57
58#include "ubifs.h" 58#include "ubifs.h"
59#include <linux/fs.h>
59#include <linux/slab.h> 60#include <linux/slab.h>
60#include <linux/xattr.h> 61#include <linux/xattr.h>
61#include <linux/posix_acl_xattr.h> 62#include <linux/posix_acl_xattr.h>
@@ -80,7 +81,6 @@ enum {
80}; 81};
81 82
82static const struct inode_operations none_inode_operations; 83static const struct inode_operations none_inode_operations;
83static const struct address_space_operations none_address_operations;
84static const struct file_operations none_file_operations; 84static const struct file_operations none_file_operations;
85 85
86/** 86/**
@@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
130 } 130 }
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &none_inode_operations;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &none_file_operations;
136 136
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 27a4babe7df0..e765743cf9f3 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -78,7 +78,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
78 78
79/* 79/*
80 * Returns the location of the fragment from 80 * Returns the location of the fragment from
81 * the begining of the filesystem. 81 * the beginning of the filesystem.
82 */ 82 */
83 83
84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) 84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 7693d6293404..3915ade6f9a8 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -483,9 +483,9 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
483} 483}
484 484
485/* 485/*
486 * Diffrent types of UFS hold fs_cstotal in different 486 * Different types of UFS hold fs_cstotal in different
487 * places, and use diffrent data structure for it. 487 * places, and use different data structure for it.
488 * To make things simplier we just copy fs_cstotal to ufs_sb_private_info 488 * To make things simpler we just copy fs_cstotal to ufs_sb_private_info
489 */ 489 */
490static void ufs_setup_cstotal(struct super_block *sb) 490static void ufs_setup_cstotal(struct super_block *sb)
491{ 491{
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 11014302c9ca..5f821dbc0579 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
479 break; 479 break;
480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) 480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
481 ufs_sync_inode (inode); 481 ufs_sync_inode (inode);
482 blk_flush_plug(current);
483 yield(); 482 yield();
484 } 483 }
485 484
diff --git a/fs/xattr.c b/fs/xattr.c
index a19acdb81cd1..f1ef94974dea 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -666,7 +666,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(dentry, name, value, size, 0, handler->flags); 669 return handler->set(dentry, name, value, size, flags, handler->flags);
670} 670}
671 671
672/* 672/*
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 52dbd14260ba..79ce38be15a1 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1295,7 +1295,7 @@ xfs_get_blocks_direct(
1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1296 * need to issue a transaction to convert the range from unwritten to written 1296 * need to issue a transaction to convert the range from unwritten to written
1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1298 * to do this and we are done. But in case this was a successfull AIO 1298 * to do this and we are done. But in case this was a successful AIO
1299 * request this handler is called from interrupt context, from which we 1299 * request this handler is called from interrupt context, from which we
1300 * can't start transactions. In that case offload the I/O completion to 1300 * can't start transactions. In that case offload the I/O completion to
1301 * the workqueues we also use for buffered I/O completion. 1301 * the workqueues we also use for buffered I/O completion.
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 596bb2c9de42..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -120,7 +120,7 @@ xfs_buf_lru_add(
120 * The unlocked check is safe here because it only occurs when there are not 120 * The unlocked check is safe here because it only occurs when there are not
121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there 121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
122 * to optimise the shrinker removing the buffer from the LRU and calling 122 * to optimise the shrinker removing the buffer from the LRU and calling
123 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the 123 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
124 * bt_lru_lock. 124 * bt_lru_lock.
125 */ 125 */
126STATIC void 126STATIC void
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory(
293 size_t nbytes, offset; 293 size_t nbytes, offset;
294 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
295 unsigned short page_count, i; 295 unsigned short page_count, i;
296 pgoff_t first;
297 xfs_off_t end; 296 xfs_off_t end;
298 int error; 297 int error;
299 298
@@ -333,7 +332,6 @@ use_alloc_page:
333 return error; 332 return error;
334 333
335 offset = bp->b_offset; 334 offset = bp->b_offset;
336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES; 335 bp->b_flags |= _XBF_PAGES;
338 336
339 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
@@ -380,7 +378,7 @@ out_free_pages:
380} 378}
381 379
382/* 380/*
383 * Map buffer into kernel address-space if nessecary. 381 * Map buffer into kernel address-space if necessary.
384 */ 382 */
385STATIC int 383STATIC int
386_xfs_buf_map_pages( 384_xfs_buf_map_pages(
@@ -657,8 +655,6 @@ xfs_buf_readahead(
657 xfs_off_t ioff, 655 xfs_off_t ioff,
658 size_t isize) 656 size_t isize)
659{ 657{
660 struct backing_dev_info *bdi;
661
662 if (bdi_read_congested(target->bt_bdi)) 658 if (bdi_read_congested(target->bt_bdi))
663 return; 659 return;
664 660
@@ -919,8 +915,6 @@ xfs_buf_lock(
919 915
920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
921 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
922 if (atomic_read(&bp->b_io_remaining))
923 blk_flush_plug(current);
924 down(&bp->b_sema); 918 down(&bp->b_sema);
925 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
926 920
@@ -1309,8 +1303,6 @@ xfs_buf_iowait(
1309{ 1303{
1310 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1311 1305
1312 if (atomic_read(&bp->b_io_remaining))
1313 blk_flush_plug(current);
1314 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1315 1307
1316 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1747,8 +1739,8 @@ xfsbufd(
1747 do { 1739 do {
1748 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1749 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1750 int count = 0;
1751 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1752 1744
1753 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1754 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1764,16 +1756,15 @@ xfsbufd(
1764 1756
1765 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1766 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1767 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1768 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1769 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1770 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1771 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1772 count++;
1773 } 1766 }
1774 if (count) 1767 blk_finish_plug(&plug);
1775 blk_flush_plug(current);
1776
1777 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1778 1769
1779 return 0; 1770 return 0;
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg(
1793 int pincount = 0; 1784 int pincount = 0;
1794 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1795 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1796 1788
1797 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1798 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg(
1807 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1808 */ 1800 */
1809 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1810 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1811 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1812 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg(
1817 } 1811 }
1818 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1819 } 1813 }
1814 blk_finish_plug(&plug);
1820 1815
1821 if (wait) { 1816 if (wait) {
1822 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1823 blk_flush_plug(current);
1824 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1826 1820
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 52aadfbed132..f4213ba1ff85 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -381,7 +381,7 @@ xfs_aio_write_isize_update(
381 381
382/* 382/*
383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then 383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
384 * part of the I/O may have been written to disk before the error occured. In 384 * part of the I/O may have been written to disk before the error occurred. In
385 * this case the on-disk file size may have been adjusted beyond the in-memory 385 * this case the on-disk file size may have been adjusted beyond the in-memory
386 * file size and now needs to be truncated back. 386 * file size and now needs to be truncated back.
387 */ 387 */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9ff7fc603d2f..dd21784525a8 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -70,7 +70,7 @@ xfs_synchronize_times(
70 70
71/* 71/*
72 * If the linux inode is valid, mark it dirty. 72 * If the linux inode is valid, mark it dirty.
73 * Used when commiting a dirty inode into a transaction so that 73 * Used when committing a dirty inode into a transaction so that
74 * the inode will get written back by the linux code 74 * the inode will get written back by the linux code
75 */ 75 */
76void 76void
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 508e06fd7d1e..9f76cceb678d 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -28,53 +28,49 @@
28/* 28/*
29 * XFS logging functions 29 * XFS logging functions
30 */ 30 */
31static int 31static void
32__xfs_printk( 32__xfs_printk(
33 const char *level, 33 const char *level,
34 const struct xfs_mount *mp, 34 const struct xfs_mount *mp,
35 struct va_format *vaf) 35 struct va_format *vaf)
36{ 36{
37 if (mp && mp->m_fsname) 37 if (mp && mp->m_fsname) {
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf); 39 return;
40 }
41 printk("%sXFS: %pV\n", level, vaf);
40} 42}
41 43
42int xfs_printk( 44void xfs_printk(
43 const char *level, 45 const char *level,
44 const struct xfs_mount *mp, 46 const struct xfs_mount *mp,
45 const char *fmt, ...) 47 const char *fmt, ...)
46{ 48{
47 struct va_format vaf; 49 struct va_format vaf;
48 va_list args; 50 va_list args;
49 int r;
50 51
51 va_start(args, fmt); 52 va_start(args, fmt);
52 53
53 vaf.fmt = fmt; 54 vaf.fmt = fmt;
54 vaf.va = &args; 55 vaf.va = &args;
55 56
56 r = __xfs_printk(level, mp, &vaf); 57 __xfs_printk(level, mp, &vaf);
57 va_end(args); 58 va_end(args);
58
59 return r;
60} 59}
61 60
62#define define_xfs_printk_level(func, kern_level) \ 61#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \ 62void func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \ 63{ \
65 struct va_format vaf; \ 64 struct va_format vaf; \
66 va_list args; \ 65 va_list args; \
67 int r; \
68 \ 66 \
69 va_start(args, fmt); \ 67 va_start(args, fmt); \
70 \ 68 \
71 vaf.fmt = fmt; \ 69 vaf.fmt = fmt; \
72 vaf.va = &args; \ 70 vaf.va = &args; \
73 \ 71 \
74 r = __xfs_printk(kern_level, mp, &vaf); \ 72 __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \ 73 va_end(args); \
76 \
77 return r; \
78} \ 74} \
79 75
80define_xfs_printk_level(xfs_emerg, KERN_EMERG); 76define_xfs_printk_level(xfs_emerg, KERN_EMERG);
@@ -88,7 +84,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO);
88define_xfs_printk_level(xfs_debug, KERN_DEBUG); 84define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif 85#endif
90 86
91int 87void
92xfs_alert_tag( 88xfs_alert_tag(
93 const struct xfs_mount *mp, 89 const struct xfs_mount *mp,
94 int panic_tag, 90 int panic_tag,
@@ -97,7 +93,6 @@ xfs_alert_tag(
97 struct va_format vaf; 93 struct va_format vaf;
98 va_list args; 94 va_list args;
99 int do_panic = 0; 95 int do_panic = 0;
100 int r;
101 96
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp, 98 xfs_printk(KERN_ALERT, mp,
@@ -110,12 +105,10 @@ xfs_alert_tag(
110 vaf.fmt = fmt; 105 vaf.fmt = fmt;
111 vaf.va = &args; 106 vaf.va = &args;
112 107
113 r = __xfs_printk(KERN_ALERT, mp, &vaf); 108 __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args); 109 va_end(args);
115 110
116 BUG_ON(do_panic); 111 BUG_ON(do_panic);
117
118 return r;
119} 112}
120 113
121void 114void
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index e77ffa16745b..f1b3fc1b6c4e 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,32 +3,34 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp, 6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...) 7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4))); 8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3))); 12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, 13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...) 14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4))); 15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3))); 17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3))); 19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3))); 21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3))); 23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3))); 25 __attribute__ ((format (printf, 2, 3)));
26 26
27#ifdef DEBUG 27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 29 __attribute__ ((format (printf, 2, 3)));
30#else 30#else
31#define xfs_debug(mp, fmt, ...) (0) 31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
32#endif 34#endif
33 35
34extern void assfail(char *expr, char *f, int l); 36extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -816,75 +816,6 @@ xfs_setup_devices(
816 return 0; 816 return 0;
817} 817}
818 818
819/*
820 * XFS AIL push thread support
821 */
822void
823xfsaild_wakeup(
824 struct xfs_ail *ailp,
825 xfs_lsn_t threshold_lsn)
826{
827 /* only ever move the target forwards */
828 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
829 ailp->xa_target = threshold_lsn;
830 wake_up_process(ailp->xa_task);
831 }
832}
833
834STATIC int
835xfsaild(
836 void *data)
837{
838 struct xfs_ail *ailp = data;
839 xfs_lsn_t last_pushed_lsn = 0;
840 long tout = 0; /* milliseconds */
841
842 while (!kthread_should_stop()) {
843 /*
844 * for short sleeps indicating congestion, don't allow us to
845 * get woken early. Otherwise all we do is bang on the AIL lock
846 * without making progress.
847 */
848 if (tout && tout <= 20)
849 __set_current_state(TASK_KILLABLE);
850 else
851 __set_current_state(TASK_INTERRUPTIBLE);
852 schedule_timeout(tout ?
853 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
854
855 /* swsusp */
856 try_to_freeze();
857
858 ASSERT(ailp->xa_mount->m_log);
859 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
860 continue;
861
862 tout = xfsaild_push(ailp, &last_pushed_lsn);
863 }
864
865 return 0;
866} /* xfsaild */
867
868int
869xfsaild_start(
870 struct xfs_ail *ailp)
871{
872 ailp->xa_target = 0;
873 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
874 ailp->xa_mount->m_fsname);
875 if (IS_ERR(ailp->xa_task))
876 return -PTR_ERR(ailp->xa_task);
877 return 0;
878}
879
880void
881xfsaild_stop(
882 struct xfs_ail *ailp)
883{
884 kthread_stop(ailp->xa_task);
885}
886
887
888/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
889STATIC struct inode * 820STATIC struct inode *
890xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs(
1191 return -error; 1122 return -error;
1192 1123
1193 if (laptop_mode) { 1124 if (laptop_mode) {
1194 int prev_sync_seq = mp->m_sync_seq;
1195
1196 /* 1125 /*
1197 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1198 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1199 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1200 */ 1129 */
1201 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1202 /*
1203 * We have to wait for the sync iteration to complete.
1204 * If we don't, the disk activity caused by the sync
1205 * will come after the sync is completed, and that
1206 * triggers another sync from laptop mode.
1207 */
1208 wait_event(mp->m_wait_single_sync_task,
1209 mp->m_sync_seq != prev_sync_seq);
1210 } 1131 }
1211 1132
1212 return 0; 1133 return 0;
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super(
1490 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1491 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1492 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1493 INIT_LIST_HEAD(&mp->m_sync_list);
1494 spin_lock_init(&mp->m_sync_lock);
1495 init_waitqueue_head(&mp->m_wait_single_sync_task);
1496 1414
1497 mp->m_super = sb; 1415 mp->m_super = sb;
1498 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void)
1799} 1717}
1800 1718
1801STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1802init_xfs_fs(void) 1752init_xfs_fs(void)
1803{ 1753{
1804 int error; 1754 int error;
@@ -1813,10 +1763,14 @@ init_xfs_fs(void)
1813 if (error) 1763 if (error)
1814 goto out; 1764 goto out;
1815 1765
1816 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1817 if (error) 1767 if (error)
1818 goto out_destroy_zones; 1768 goto out_destroy_zones;
1819 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1820 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1821 if (error) 1775 if (error)
1822 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1833,6 +1787,10 @@ init_xfs_fs(void)
1833 if (error) 1787 if (error)
1834 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1835 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1836 vfs_initquota(); 1794 vfs_initquota();
1837 1795
1838 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1850,6 +1808,8 @@ init_xfs_fs(void)
1850 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1851 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1852 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1853 out_destroy_zones: 1813 out_destroy_zones:
1854 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1855 out: 1815 out:
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void)
1866 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1867 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1868 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1869 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1870} 1831}
1871 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 594cd822d84d..3e898a48122d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
401/* 404/*
402 * Second stage of a quiesce. The data is already synced, now we have to take 405 * Second stage of a quiesce. The data is already synced, now we have to take
403 * care of the metadata. New transactions are already blocked, so we need to 406 * care of the metadata. New transactions are already blocked, so we need to
404 * wait for any remaining transactions to drain out before proceding. 407 * wait for any remaining transactions to drain out before proceeding.
405 */ 408 */
406void 409void
407xfs_quiesce_attr( 410xfs_quiesce_attr(
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
431 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
432} 435}
433 436
434/* 437static void
435 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
436 * Doing this has two advantages: 439 struct xfs_mount *mp)
437 * - It saves on stack space, which is tight in certain situations
438 * - It can be used (with care) as a mechanism to avoid deadlocks.
439 * Flushing while allocating in a full filesystem requires both.
440 */
441STATIC void
442xfs_syncd_queue_work(
443 struct xfs_mount *mp,
444 void *data,
445 void (*syncer)(struct xfs_mount *, void *),
446 struct completion *completion)
447{ 440{
448 struct xfs_sync_work *work; 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
449 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
450 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
451 INIT_LIST_HEAD(&work->w_list);
452 work->w_syncer = syncer;
453 work->w_data = data;
454 work->w_mount = mp;
455 work->w_completion = completion;
456 spin_lock(&mp->m_sync_lock);
457 list_add_tail(&work->w_list, &mp->m_sync_list);
458 spin_unlock(&mp->m_sync_lock);
459 wake_up_process(mp->m_sync_task);
460}
461
462/*
463 * Flush delayed allocate data, attempting to free up reserved space
464 * from existing allocations. At this point a new allocation attempt
465 * has failed with ENOSPC and we are in the process of scratching our
466 * heads, looking about for more room...
467 */
468STATIC void
469xfs_flush_inodes_work(
470 struct xfs_mount *mp,
471 void *arg)
472{
473 struct inode *inode = arg;
474 xfs_sync_data(mp, SYNC_TRYLOCK);
475 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
476 iput(inode);
477}
478
479void
480xfs_flush_inodes(
481 xfs_inode_t *ip)
482{
483 struct inode *inode = VFS_I(ip);
484 DECLARE_COMPLETION_ONSTACK(completion);
485
486 igrab(inode);
487 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
488 wait_for_completion(&completion);
489 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490} 443}
491 444
492/* 445/*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
496 */ 449 */
497STATIC void 450STATIC void
498xfs_sync_worker( 451xfs_sync_worker(
499 struct xfs_mount *mp, 452 struct work_struct *work)
500 void *unused)
501{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
502 int error; 456 int error;
503 457
504 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
508 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
509 else 463 else
510 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
511 xfs_reclaim_inodes(mp, 0);
512 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
513 } 469 }
514 mp->m_sync_seq++; 470
515 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
516} 473}
517 474
518STATIC int 475/*
519xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
520 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
521{ 485{
522 struct xfs_mount *mp = arg;
523 long timeleft;
524 xfs_sync_work_t *work, *n;
525 LIST_HEAD (tmp);
526
527 set_freezable();
528 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529 for (;;) {
530 if (list_empty(&mp->m_sync_list))
531 timeleft = schedule_timeout_interruptible(timeleft);
532 /* swsusp */
533 try_to_freeze();
534 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535 break;
536 486
537 spin_lock(&mp->m_sync_lock); 487 /*
538 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
539 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
540 * that's the (only!) case where the list would be 490 * during unmount.
541 * empty with time remaining. 491 */
542 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
543 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
544 if (!timeleft)
545 timeleft = xfs_syncd_centisecs *
546 msecs_to_jiffies(10);
547 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548 list_add_tail(&mp->m_sync_work.w_list,
549 &mp->m_sync_list);
550 }
551 list_splice_init(&mp->m_sync_list, &tmp);
552 spin_unlock(&mp->m_sync_lock);
553 494
554 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
555 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
556 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
557 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
558 continue;
559 if (work->w_completion)
560 complete(work->w_completion);
561 kmem_free(work);
562 }
563 } 499 }
500 rcu_read_unlock();
501}
564 502
565 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
566} 552}
567 553
568int 554int
569xfs_syncd_init( 555xfs_syncd_init(
570 struct xfs_mount *mp) 556 struct xfs_mount *mp)
571{ 557{
572 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
573 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
574 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
575 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
576 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
577 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
578 return 0; 565 return 0;
579} 566}
580 567
@@ -582,7 +569,9 @@ void
582xfs_syncd_stop( 569xfs_syncd_stop(
583 struct xfs_mount *mp) 570 struct xfs_mount *mp)
584{ 571{
585 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
586} 575}
587 576
588void 577void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
601 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
602 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
603 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
604 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
605 -1, _RET_IP_); 598 -1, _RET_IP_);
606 } 599 }
@@ -933,6 +926,7 @@ restart:
933 XFS_LOOKUP_BATCH, 926 XFS_LOOKUP_BATCH,
934 XFS_ICI_RECLAIM_TAG); 927 XFS_ICI_RECLAIM_TAG);
935 if (!nr_found) { 928 if (!nr_found) {
929 done = 1;
936 rcu_read_unlock(); 930 rcu_read_unlock();
937 break; 931 break;
938 } 932 }
@@ -1017,7 +1011,13 @@ xfs_reclaim_inodes(
1017} 1011}
1018 1012
1019/* 1013/*
1020 * Shrinker infrastructure. 1014 * Inode cache shrinker.
1015 *
1016 * When called we make sure that there is a background (fast) inode reclaim in
1017 * progress, while we will throttle the speed of reclaim via doiing synchronous
1018 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1019 * them to be cleaned, which we hope will not be very long due to the
1020 * background walker having already kicked the IO off on those dirty inodes.
1021 */ 1021 */
1022static int 1022static int
1023xfs_reclaim_inode_shrink( 1023xfs_reclaim_inode_shrink(
@@ -1032,10 +1032,15 @@ xfs_reclaim_inode_shrink(
1032 1032
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1034 if (nr_to_scan) {
1035 /* kick background reclaimer and push the AIL */
1036 xfs_syncd_queue_reclaim(mp);
1037 xfs_ail_push_all(mp->m_ail);
1038
1035 if (!(gfp_mask & __GFP_FS)) 1039 if (!(gfp_mask & __GFP_FS))
1036 return -1; 1040 return -1;
1037 1041
1038 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1042 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1043 &nr_to_scan);
1039 /* terminate if we don't exhaust the scan */ 1044 /* terminate if we don't exhaust the scan */
1040 if (nr_to_scan > 0) 1045 if (nr_to_scan > 0)
1041 return -1; 1046 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 7e2416478503..6fa214603819 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -600,7 +600,7 @@ xfs_qm_dqread(
600 600
601 /* 601 /*
602 * Reservation counters are defined as reservation plus current usage 602 * Reservation counters are defined as reservation plus current usage
603 * to avoid having to add everytime. 603 * to avoid having to add every time.
604 */ 604 */
605 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 605 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
606 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 606 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 254ee062bd7d..69228aa8605a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -461,12 +461,10 @@ xfs_qm_dqflush_all(
461 struct xfs_quotainfo *q = mp->m_quotainfo; 461 struct xfs_quotainfo *q = mp->m_quotainfo;
462 int recl; 462 int recl;
463 struct xfs_dquot *dqp; 463 struct xfs_dquot *dqp;
464 int niters;
465 int error; 464 int error;
466 465
467 if (!q) 466 if (!q)
468 return 0; 467 return 0;
469 niters = 0;
470again: 468again:
471 mutex_lock(&q->qi_dqlist_lock); 469 mutex_lock(&q->qi_dqlist_lock);
472 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 470 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
@@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs(
1314{ 1312{
1315 xfs_buf_t *bp; 1313 xfs_buf_t *bp;
1316 int error; 1314 int error;
1317 int notcommitted;
1318 int incr;
1319 int type; 1315 int type;
1320 1316
1321 ASSERT(blkcnt > 0); 1317 ASSERT(blkcnt > 0);
1322 notcommitted = 0;
1323 incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1324 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1325 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 1318 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1326 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 1319 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1327 error = 0; 1320 error = 0;
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index c9446f1c726d..567b29b9f1b3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone;
65 * block in the dquot/xqm code. 65 * block in the dquot/xqm code.
66 */ 66 */
67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
68/*
69 * When doing a quotacheck, we log dquot clusters of this many FSBs at most
70 * in a single transaction. We don't want to ask for too huge a log reservation.
71 */
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 68
74typedef xfs_dqhash_t xfs_dqlist_t; 69typedef xfs_dqhash_t xfs_dqlist_t;
75 70
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 774d7ec6df8e..a0a829addca9 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -134,7 +134,7 @@ xfs_qm_newmount(
134 */ 134 */
135 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { 135 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
136 /* 136 /*
137 * If an error occured, qm_mount_quotas code 137 * If an error occurred, qm_mount_quotas code
138 * has already disabled quotas. So, just finish 138 * has already disabled quotas. So, just finish
139 * mounting, and get on with the boring life 139 * mounting, and get on with the boring life
140 * without disk quotas. 140 * without disk quotas.
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c82f06778a27..2dadb15d5ca9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -172,7 +172,7 @@ xfs_qm_scall_quotaoff(
172 /* 172 /*
173 * Next we make the changes in the quota flag in the mount struct. 173 * Next we make the changes in the quota flag in the mount struct.
174 * This isn't protected by a particular lock directly, because we 174 * This isn't protected by a particular lock directly, because we
175 * don't want to take a mrlock everytime we depend on quotas being on. 175 * don't want to take a mrlock every time we depend on quotas being on.
176 */ 176 */
177 mp->m_qflags &= ~(flags); 177 mp->m_qflags &= ~(flags);
178 178
@@ -313,14 +313,12 @@ xfs_qm_scall_quotaon(
313{ 313{
314 int error; 314 int error;
315 uint qf; 315 uint qf;
316 uint accflags;
317 __int64_t sbflags; 316 __int64_t sbflags;
318 317
319 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 318 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
320 /* 319 /*
321 * Switching on quota accounting must be done at mount time. 320 * Switching on quota accounting must be done at mount time.
322 */ 321 */
323 accflags = flags & XFS_ALL_QUOTA_ACCT;
324 flags &= ~(XFS_ALL_QUOTA_ACCT); 322 flags &= ~(XFS_ALL_QUOTA_ACCT);
325 323
326 sbflags = 0; 324 sbflags = 0;
@@ -354,7 +352,7 @@ xfs_qm_scall_quotaon(
354 return XFS_ERROR(EINVAL); 352 return XFS_ERROR(EINVAL);
355 } 353 }
356 /* 354 /*
357 * If everything's upto-date incore, then don't waste time. 355 * If everything's up to-date incore, then don't waste time.
358 */ 356 */
359 if ((mp->m_qflags & flags) == flags) 357 if ((mp->m_qflags & flags) == flags)
360 return XFS_ERROR(EEXIST); 358 return XFS_ERROR(EEXIST);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4bc3c649aee4..27d64d752eab 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2395,17 +2395,33 @@ xfs_free_extent(
2395 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2395 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2396 args.tp = tp; 2396 args.tp = tp;
2397 args.mp = tp->t_mountp; 2397 args.mp = tp->t_mountp;
2398
2399 /*
2400 * validate that the block number is legal - the enables us to detect
2401 * and handle a silent filesystem corruption rather than crashing.
2402 */
2398 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2403 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2399 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2404 if (args.agno >= args.mp->m_sb.sb_agcount)
2405 return EFSCORRUPTED;
2406
2400 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2407 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2408 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2409 return EFSCORRUPTED;
2410
2401 args.pag = xfs_perag_get(args.mp, args.agno); 2411 args.pag = xfs_perag_get(args.mp, args.agno);
2402 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2412 ASSERT(args.pag);
2413
2414 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2415 if (error)
2403 goto error0; 2416 goto error0;
2404#ifdef DEBUG 2417
2405 ASSERT(args.agbp != NULL); 2418 /* validate the extent size is legal now we have the agf locked */
2406 ASSERT((args.agbno + len) <= 2419 if (args.agbno + len >
2407 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2420 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2408#endif 2421 error = EFSCORRUPTED;
2422 goto error0;
2423 }
2424
2409 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2410error0: 2426error0:
2411 xfs_perag_put(args.pag); 2427 xfs_perag_put(args.pag);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e5413d96f1af..7b7e005e3dcc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -992,7 +992,7 @@ xfs_buf_iodone_callbacks(
992 lasttarg = XFS_BUF_TARGET(bp); 992 lasttarg = XFS_BUF_TARGET(bp);
993 993
994 /* 994 /*
995 * If the write was asynchronous then noone will be looking for the 995 * If the write was asynchronous then no one will be looking for the
996 * error. Clear the error state and write the buffer out again. 996 * error. Clear the error state and write the buffer out again.
997 * 997 *
998 * During sync or umount we'll write all pending buffers again 998 * During sync or umount we'll write all pending buffers again
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 742c8330994a..a37480a6e023 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2789,7 +2789,7 @@ xfs_iflush(
2789 2789
2790 /* 2790 /*
2791 * We can't flush the inode until it is unpinned, so wait for it if we 2791 * We can't flush the inode until it is unpinned, so wait for it if we
2792 * are allowed to block. We know noone new can pin it, because we are 2792 * are allowed to block. We know no one new can pin it, because we are
2793 * holding the inode lock shared and you need to hold it exclusively to 2793 * holding the inode lock shared and you need to hold it exclusively to
2794 * pin the inode. 2794 * pin the inode.
2795 * 2795 *
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f753200cef8d..ff4e2a30227d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -111,7 +111,7 @@ struct xfs_imap {
111 * Generally, we do not want to hold the i_rlock while holding the 111 * Generally, we do not want to hold the i_rlock while holding the
112 * i_ilock. Hierarchy is i_iolock followed by i_rlock. 112 * i_ilock. Hierarchy is i_iolock followed by i_rlock.
113 * 113 *
114 * xfs_iptr_t contains all the inode fields upto and including the 114 * xfs_iptr_t contains all the inode fields up to and including the
115 * i_mnext and i_mprev fields, it is used as a marker in the inode 115 * i_mnext and i_mprev fields, it is used as a marker in the inode
116 * chain off the mount structure by xfs_sync calls. 116 * chain off the mount structure by xfs_sync calls.
117 */ 117 */
@@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
336 336
337/* 337/*
338 * Project quota id helpers (previously projid was 16bit only 338 * Project quota id helpers (previously projid was 16bit only
339 * and using two 16bit values to hold new 32bit projid was choosen 339 * and using two 16bit values to hold new 32bit projid was chosen
340 * to retain compatibility with "old" filesystems). 340 * to retain compatibility with "old" filesystems).
341 */ 341 */
342static inline prid_t 342static inline prid_t
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 46cc40131d4a..576fdfe81d60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -198,6 +198,41 @@ xfs_inode_item_size(
198} 198}
199 199
200/* 200/*
201 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
202 *
203 * For either the data or attr fork in extent format, we need to endian convert
204 * the in-core extent as we place them into the on-disk inode. In this case, we
205 * need to do this conversion before we write the extents into the log. Because
206 * we don't have the disk inode to write into here, we allocate a buffer and
207 * format the extents into it via xfs_iextents_copy(). We free the buffer in
208 * the unlock routine after the copy for the log has been made.
209 *
210 * In the case of the data fork, the in-core and on-disk fork sizes can be
211 * different due to delayed allocation extents. We only log on-disk extents
212 * here, so always use the physical fork size to determine the size of the
213 * buffer we need to allocate.
214 */
215STATIC void
216xfs_inode_item_format_extents(
217 struct xfs_inode *ip,
218 struct xfs_log_iovec *vecp,
219 int whichfork,
220 int type)
221{
222 xfs_bmbt_rec_t *ext_buffer;
223
224 ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
225 if (whichfork == XFS_DATA_FORK)
226 ip->i_itemp->ili_extents_buf = ext_buffer;
227 else
228 ip->i_itemp->ili_aextents_buf = ext_buffer;
229
230 vecp->i_addr = ext_buffer;
231 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
232 vecp->i_type = type;
233}
234
235/*
201 * This is called to fill in the vector of log iovecs for the 236 * This is called to fill in the vector of log iovecs for the
202 * given inode log item. It fills the first item with an inode 237 * given inode log item. It fills the first item with an inode
203 * log format structure, the second with the on-disk inode structure, 238 * log format structure, the second with the on-disk inode structure,
@@ -213,7 +248,6 @@ xfs_inode_item_format(
213 struct xfs_inode *ip = iip->ili_inode; 248 struct xfs_inode *ip = iip->ili_inode;
214 uint nvecs; 249 uint nvecs;
215 size_t data_bytes; 250 size_t data_bytes;
216 xfs_bmbt_rec_t *ext_buffer;
217 xfs_mount_t *mp; 251 xfs_mount_t *mp;
218 252
219 vecp->i_addr = &iip->ili_format; 253 vecp->i_addr = &iip->ili_format;
@@ -320,22 +354,8 @@ xfs_inode_item_format(
320 } else 354 } else
321#endif 355#endif
322 { 356 {
323 /* 357 xfs_inode_item_format_extents(ip, vecp,
324 * There are delayed allocation extents 358 XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
325 * in the inode, or we need to convert
326 * the extents to on disk format.
327 * Use xfs_iextents_copy()
328 * to copy only the real extents into
329 * a separate buffer. We'll free the
330 * buffer in the unlock routine.
331 */
332 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
333 KM_SLEEP);
334 iip->ili_extents_buf = ext_buffer;
335 vecp->i_addr = ext_buffer;
336 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
337 XFS_DATA_FORK);
338 vecp->i_type = XLOG_REG_TYPE_IEXT;
339 } 359 }
340 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
341 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -445,19 +465,12 @@ xfs_inode_item_format(
445 */ 465 */
446 vecp->i_addr = ip->i_afp->if_u1.if_extents; 466 vecp->i_addr = ip->i_afp->if_u1.if_extents;
447 vecp->i_len = ip->i_afp->if_bytes; 467 vecp->i_len = ip->i_afp->if_bytes;
468 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
448#else 469#else
449 ASSERT(iip->ili_aextents_buf == NULL); 470 ASSERT(iip->ili_aextents_buf == NULL);
450 /* 471 xfs_inode_item_format_extents(ip, vecp,
451 * Need to endian flip before logging 472 XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
452 */
453 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
454 KM_SLEEP);
455 iip->ili_aextents_buf = ext_buffer;
456 vecp->i_addr = ext_buffer;
457 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
458 XFS_ATTR_FORK);
459#endif 473#endif
460 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
461 iip->ili_format.ilf_asize = vecp->i_len; 474 iip->ili_format.ilf_asize = vecp->i_len;
462 vecp++; 475 vecp++;
463 nvecs++; 476 nvecs++;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index dc1882adaf54..751e94fe1f77 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -204,7 +204,6 @@ xfs_bulkstat(
204 xfs_agi_t *agi; /* agi header data */ 204 xfs_agi_t *agi; /* agi header data */
205 xfs_agino_t agino; /* inode # in allocation group */ 205 xfs_agino_t agino; /* inode # in allocation group */
206 xfs_agnumber_t agno; /* allocation group number */ 206 xfs_agnumber_t agno; /* allocation group number */
207 xfs_daddr_t bno; /* inode cluster start daddr */
208 int chunkidx; /* current index into inode chunk */ 207 int chunkidx; /* current index into inode chunk */
209 int clustidx; /* current index into inode cluster */ 208 int clustidx; /* current index into inode cluster */
210 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 209 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
@@ -463,7 +462,6 @@ xfs_bulkstat(
463 mp->m_sb.sb_inopblog); 462 mp->m_sb.sb_inopblog);
464 } 463 }
465 ino = XFS_AGINO_TO_INO(mp, agno, agino); 464 ino = XFS_AGINO_TO_INO(mp, agno, agino);
466 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
467 /* 465 /*
468 * Skip if this inode is free. 466 * Skip if this inode is free.
469 */ 467 */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 25efa9b8a602..b612ce4520ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
761 break; 761 break;
762 case XLOG_STATE_COVER_NEED: 762 case XLOG_STATE_COVER_NEED:
763 case XLOG_STATE_COVER_NEED2: 763 case XLOG_STATE_COVER_NEED2:
764 if (!xfs_trans_ail_tail(log->l_ailp) && 764 if (!xfs_ail_min_lsn(log->l_ailp) &&
765 xlog_iclogs_empty(log)) { 765 xlog_iclogs_empty(log)) {
766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 766 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
767 log->l_covered_state = XLOG_STATE_COVER_DONE; 767 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -801,7 +801,7 @@ xlog_assign_tail_lsn(
801 xfs_lsn_t tail_lsn; 801 xfs_lsn_t tail_lsn;
802 struct log *log = mp->m_log; 802 struct log *log = mp->m_log;
803 803
804 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 804 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
805 if (!tail_lsn) 805 if (!tail_lsn)
806 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 806 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
807 807
@@ -1239,7 +1239,7 @@ xlog_grant_push_ail(
1239 * the filesystem is shutting down. 1239 * the filesystem is shutting down.
1240 */ 1240 */
1241 if (!XLOG_FORCED_SHUTDOWN(log)) 1241 if (!XLOG_FORCED_SHUTDOWN(log))
1242 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1242 xfs_ail_push(log->l_ailp, threshold_lsn);
1243} 1243}
1244 1244
1245/* 1245/*
@@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr(
3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3408} 3408}
3409 3409
3410/*
3411 * Check to make sure the grant write head didn't just over lap the tail. If
3412 * the cycles are the same, we can't be overlapping. Otherwise, make sure that
3413 * the cycles differ by exactly one and check the byte count.
3414 *
3415 * This check is run unlocked, so can give false positives. Rather than assert
3416 * on failures, use a warn-once flag and a panic tag to allow the admin to
3417 * determine if they want to panic the machine when such an error occurs. For
3418 * debug kernels this will have the same effect as using an assert but, unlinke
3419 * an assert, it can be turned off at runtime.
3420 */
3410STATIC void 3421STATIC void
3411xlog_verify_grant_tail( 3422xlog_verify_grant_tail(
3412 struct log *log) 3423 struct log *log)
@@ -3414,17 +3425,22 @@ xlog_verify_grant_tail(
3414 int tail_cycle, tail_blocks; 3425 int tail_cycle, tail_blocks;
3415 int cycle, space; 3426 int cycle, space;
3416 3427
3417 /*
3418 * Check to make sure the grant write head didn't just over lap the
3419 * tail. If the cycles are the same, we can't be overlapping.
3420 * Otherwise, make sure that the cycles differ by exactly one and
3421 * check the byte count.
3422 */
3423 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3428 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3424 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3429 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3425 if (tail_cycle != cycle) { 3430 if (tail_cycle != cycle) {
3426 ASSERT(cycle - 1 == tail_cycle); 3431 if (cycle - 1 != tail_cycle &&
3427 ASSERT(space <= BBTOB(tail_blocks)); 3432 !(log->l_flags & XLOG_TAIL_WARN)) {
3433 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3434 "%s: cycle - 1 != tail_cycle", __func__);
3435 log->l_flags |= XLOG_TAIL_WARN;
3436 }
3437
3438 if (space > BBTOB(tail_blocks) &&
3439 !(log->l_flags & XLOG_TAIL_WARN)) {
3440 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3441 "%s: space > BBTOB(tail_blocks)", __func__);
3442 log->l_flags |= XLOG_TAIL_WARN;
3443 }
3428 } 3444 }
3429} 3445}
3430 3446
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 15dbf1f9c2be..5864850e9e34 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i)
144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
147 148
148#ifdef __KERNEL__ 149#ifdef __KERNEL__
149/* 150/*
@@ -570,7 +571,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
570 * When we crack an atomic LSN, we sample it first so that the value will not 571 * When we crack an atomic LSN, we sample it first so that the value will not
571 * change while we are cracking it into the component values. This means we 572 * change while we are cracking it into the component values. This means we
572 * will always get consistent component values to work from. This should always 573 * will always get consistent component values to work from. This should always
573 * be used to smaple and crack LSNs taht are stored and updated in atomic 574 * be used to sample and crack LSNs that are stored and updated in atomic
574 * variables. 575 * variables.
575 */ 576 */
576static inline void 577static inline void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0c4a5618e7af..5cc464a17c93 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -101,7 +101,7 @@ xlog_get_bp(
101 /* 101 /*
102 * We do log I/O in units of log sectors (a power-of-2 102 * We do log I/O in units of log sectors (a power-of-2
103 * multiple of the basic block size), so we round up the 103 * multiple of the basic block size), so we round up the
104 * requested size to acommodate the basic blocks required 104 * requested size to accommodate the basic blocks required
105 * for complete log sectors. 105 * for complete log sectors.
106 * 106 *
107 * In addition, the buffer may be used for a non-sector- 107 * In addition, the buffer may be used for a non-sector-
@@ -112,7 +112,7 @@ xlog_get_bp(
112 * an issue. Nor will this be a problem if the log I/O is 112 * an issue. Nor will this be a problem if the log I/O is
113 * done in basic blocks (sector size 1). But otherwise we 113 * done in basic blocks (sector size 1). But otherwise we
114 * extend the buffer by one extra log sector to ensure 114 * extend the buffer by one extra log sector to ensure
115 * there's space to accomodate this possiblility. 115 * there's space to accommodate this possibility.
116 */ 116 */
117 if (nbblks > 1 && log->l_sectBBsize > 1) 117 if (nbblks > 1 && log->l_sectBBsize > 1)
118 nbblks += log->l_sectBBsize; 118 nbblks += log->l_sectBBsize;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..19af0ab0d0c6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,9 @@ typedef struct xfs_mount {
203 struct mutex m_icsb_mutex; /* balancer sync lock */ 203 struct mutex m_icsb_mutex; /* balancer sync lock */
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct task_struct *m_sync_task; /* generalised sync thread */ 206 struct delayed_work m_sync_work; /* background sync work */
207 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 207 struct delayed_work m_reclaim_work; /* background inode reclaim */
208 struct list_head m_sync_list; /* sync thread work item list */ 208 struct work_struct m_flush_work; /* background inode flush */
209 spinlock_t m_sync_lock; /* work item list lock */
210 int m_sync_seq; /* sync thread generation no. */
211 wait_queue_head_t m_wait_single_sync_task;
212 __int64_t m_update_flags; /* sb flags we need to update 209 __int64_t m_update_flags; /* sb flags we need to update
213 on the next remount,rw */ 210 on the next remount,rw */
214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 12aff9584e29..5fc2380092c8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,74 +28,138 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); 31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 32
36#ifdef DEBUG 33#ifdef DEBUG
37STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); 34/*
38#else 35 * Check that the list is sorted as it should be.
36 */
37STATIC void
38xfs_ail_check(
39 struct xfs_ail *ailp,
40 xfs_log_item_t *lip)
41{
42 xfs_log_item_t *prev_lip;
43
44 if (list_empty(&ailp->xa_ail))
45 return;
46
47 /*
48 * Check the next and previous entries are valid.
49 */
50 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
51 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
52 if (&prev_lip->li_ail != &ailp->xa_ail)
53 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
54
55 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
56 if (&prev_lip->li_ail != &ailp->xa_ail)
57 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
58
59
60#ifdef XFS_TRANS_DEBUG
61 /*
62 * Walk the list checking lsn ordering, and that every entry has the
63 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
64 * when specifically debugging the transaction subsystem.
65 */
66 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
67 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
68 if (&prev_lip->li_ail != &ailp->xa_ail)
69 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
70 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
71 prev_lip = lip;
72 }
73#endif /* XFS_TRANS_DEBUG */
74}
75#else /* !DEBUG */
39#define xfs_ail_check(a,l) 76#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 77#endif /* DEBUG */
41 78
79/*
80 * Return a pointer to the first item in the AIL. If the AIL is empty, then
81 * return NULL.
82 */
83static xfs_log_item_t *
84xfs_ail_min(
85 struct xfs_ail *ailp)
86{
87 if (list_empty(&ailp->xa_ail))
88 return NULL;
89
90 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
91}
92
93 /*
94 * Return a pointer to the last item in the AIL. If the AIL is empty, then
95 * return NULL.
96 */
97static xfs_log_item_t *
98xfs_ail_max(
99 struct xfs_ail *ailp)
100{
101 if (list_empty(&ailp->xa_ail))
102 return NULL;
103
104 return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
105}
106
107/*
108 * Return a pointer to the item which follows the given item in the AIL. If
109 * the given item is the last item in the list, then return NULL.
110 */
111static xfs_log_item_t *
112xfs_ail_next(
113 struct xfs_ail *ailp,
114 xfs_log_item_t *lip)
115{
116 if (lip->li_ail.next == &ailp->xa_ail)
117 return NULL;
118
119 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
120}
42 121
43/* 122/*
44 * This is called by the log manager code to determine the LSN 123 * This is called by the log manager code to determine the LSN of the tail of
45 * of the tail of the log. This is exactly the LSN of the first 124 * the log. This is exactly the LSN of the first item in the AIL. If the AIL
46 * item in the AIL. If the AIL is empty, then this function 125 * is empty, then this function returns 0.
47 * returns 0.
48 * 126 *
49 * We need the AIL lock in order to get a coherent read of the 127 * We need the AIL lock in order to get a coherent read of the lsn of the last
50 * lsn of the last item in the AIL. 128 * item in the AIL.
51 */ 129 */
52xfs_lsn_t 130xfs_lsn_t
53xfs_trans_ail_tail( 131xfs_ail_min_lsn(
54 struct xfs_ail *ailp) 132 struct xfs_ail *ailp)
55{ 133{
56 xfs_lsn_t lsn; 134 xfs_lsn_t lsn = 0;
57 xfs_log_item_t *lip; 135 xfs_log_item_t *lip;
58 136
59 spin_lock(&ailp->xa_lock); 137 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(ailp); 138 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 139 if (lip)
62 lsn = (xfs_lsn_t)0;
63 } else {
64 lsn = lip->li_lsn; 140 lsn = lip->li_lsn;
65 }
66 spin_unlock(&ailp->xa_lock); 141 spin_unlock(&ailp->xa_lock);
67 142
68 return lsn; 143 return lsn;
69} 144}
70 145
71/* 146/*
72 * xfs_trans_push_ail 147 * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
73 *
74 * This routine is called to move the tail of the AIL forward. It does this by
75 * trying to flush items in the AIL whose lsns are below the given
76 * threshold_lsn.
77 *
78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do its work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
86 */ 148 */
87void 149static xfs_lsn_t
88xfs_trans_ail_push( 150xfs_ail_max_lsn(
89 struct xfs_ail *ailp, 151 struct xfs_ail *ailp)
90 xfs_lsn_t threshold_lsn)
91{ 152{
92 xfs_log_item_t *lip; 153 xfs_lsn_t lsn = 0;
154 xfs_log_item_t *lip;
93 155
94 lip = xfs_ail_min(ailp); 156 spin_lock(&ailp->xa_lock);
95 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { 157 lip = xfs_ail_max(ailp);
96 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) 158 if (lip)
97 xfsaild_wakeup(ailp, threshold_lsn); 159 lsn = lip->li_lsn;
98 } 160 spin_unlock(&ailp->xa_lock);
161
162 return lsn;
99} 163}
100 164
101/* 165/*
@@ -236,35 +300,78 @@ out:
236} 300}
237 301
238/* 302/*
239 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of 303 * splice the log item list into the AIL at the given LSN.
240 * zero indicates that the caller should sleep until woken.
241 */ 304 */
242long 305static void
243xfsaild_push( 306xfs_ail_splice(
244 struct xfs_ail *ailp, 307 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 308 struct list_head *list,
309 xfs_lsn_t lsn)
246{ 310{
247 long tout = 0; 311 xfs_log_item_t *next_lip;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 312
249 xfs_lsn_t target = ailp->xa_target; 313 /* If the list is empty, just insert the item. */
250 xfs_lsn_t lsn; 314 if (list_empty(&ailp->xa_ail)) {
251 xfs_log_item_t *lip; 315 list_splice(list, &ailp->xa_ail);
252 int flush_log, count, stuck; 316 return;
253 xfs_mount_t *mp = ailp->xa_mount; 317 }
318
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
321 break;
322 }
323
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328}
329
330/*
331 * Delete the given item from the AIL. Return a pointer to the item.
332 */
333static void
334xfs_ail_delete(
335 struct xfs_ail *ailp,
336 xfs_log_item_t *lip)
337{
338 xfs_ail_check(ailp, lip);
339 list_del(&lip->li_ail);
340 xfs_trans_ail_cursor_clear(ailp, lip);
341}
342
343/*
344 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
345 * to run at a later time if there is more work to do to complete the push.
346 */
347STATIC void
348xfs_ail_worker(
349 struct work_struct *work)
350{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work);
353 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 354 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
255 int push_xfsbufd = 0; 355 xfs_log_item_t *lip;
356 xfs_lsn_t lsn;
357 xfs_lsn_t target;
358 long tout = 10;
359 int flush_log = 0;
360 int stuck = 0;
361 int count = 0;
362 int push_xfsbufd = 0;
256 363
257 spin_lock(&ailp->xa_lock); 364 spin_lock(&ailp->xa_lock);
365 target = ailp->xa_target;
258 xfs_trans_ail_cursor_init(ailp, cur); 366 xfs_trans_ail_cursor_init(ailp, cur);
259 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); 367 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
260 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 368 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
261 /* 369 /*
262 * AIL is empty or our push has reached the end. 370 * AIL is empty or our push has reached the end.
263 */ 371 */
264 xfs_trans_ail_cursor_done(ailp, cur); 372 xfs_trans_ail_cursor_done(ailp, cur);
265 spin_unlock(&ailp->xa_lock); 373 spin_unlock(&ailp->xa_lock);
266 *last_lsn = 0; 374 goto out_done;
267 return tout;
268 } 375 }
269 376
270 XFS_STATS_INC(xs_push_ail); 377 XFS_STATS_INC(xs_push_ail);
@@ -281,8 +388,7 @@ xfsaild_push(
281 * lots of contention on the AIL lists. 388 * lots of contention on the AIL lists.
282 */ 389 */
283 lsn = lip->li_lsn; 390 lsn = lip->li_lsn;
284 flush_log = stuck = count = 0; 391 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
286 int lock_result; 392 int lock_result;
287 /* 393 /*
288 * If we can lock the item without sleeping, unlock the AIL 394 * If we can lock the item without sleeping, unlock the AIL
@@ -301,13 +407,13 @@ xfsaild_push(
301 case XFS_ITEM_SUCCESS: 407 case XFS_ITEM_SUCCESS:
302 XFS_STATS_INC(xs_push_ail_success); 408 XFS_STATS_INC(xs_push_ail_success);
303 IOP_PUSH(lip); 409 IOP_PUSH(lip);
304 last_pushed_lsn = lsn; 410 ailp->xa_last_pushed_lsn = lsn;
305 break; 411 break;
306 412
307 case XFS_ITEM_PUSHBUF: 413 case XFS_ITEM_PUSHBUF:
308 XFS_STATS_INC(xs_push_ail_pushbuf); 414 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 415 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 416 ailp->xa_last_pushed_lsn = lsn;
311 push_xfsbufd = 1; 417 push_xfsbufd = 1;
312 break; 418 break;
313 419
@@ -319,7 +425,7 @@ xfsaild_push(
319 425
320 case XFS_ITEM_LOCKED: 426 case XFS_ITEM_LOCKED:
321 XFS_STATS_INC(xs_push_ail_locked); 427 XFS_STATS_INC(xs_push_ail_locked);
322 last_pushed_lsn = lsn; 428 ailp->xa_last_pushed_lsn = lsn;
323 stuck++; 429 stuck++;
324 break; 430 break;
325 431
@@ -374,9 +480,27 @@ xfsaild_push(
374 wake_up_process(mp->m_ddev_targp->bt_task); 480 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 481 }
376 482
483 /* assume we have more work to do in a short while */
484out_done:
377 if (!count) { 485 if (!count) {
378 /* We're past our target or empty, so idle */ 486 /* We're past our target or empty, so idle */
379 last_pushed_lsn = 0; 487 ailp->xa_last_pushed_lsn = 0;
488
489 /*
490 * We clear the XFS_AIL_PUSHING_BIT first before checking
491 * whether the target has changed. If the target has changed,
492 * this pushes the requeue race directly onto the result of the
493 * atomic test/set bit, so we are guaranteed that either the
494 * the pusher that changed the target or ourselves will requeue
495 * the work (but not both).
496 */
497 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
498 smp_rmb();
499 if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
500 test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
501 return;
502
503 tout = 50;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 504 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 505 /*
382 * We reached the target so wait a bit longer for I/O to 506 * We reached the target so wait a bit longer for I/O to
@@ -384,7 +508,7 @@ xfsaild_push(
384 * start the next scan from the start of the AIL. 508 * start the next scan from the start of the AIL.
385 */ 509 */
386 tout = 50; 510 tout = 50;
387 last_pushed_lsn = 0; 511 ailp->xa_last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 512 } else if ((stuck * 100) / count > 90) {
389 /* 513 /*
390 * Either there is a lot of contention on the AIL or we 514 * Either there is a lot of contention on the AIL or we
@@ -396,14 +520,61 @@ xfsaild_push(
396 * continuing from where we were. 520 * continuing from where we were.
397 */ 521 */
398 tout = 20; 522 tout = 20;
399 } else {
400 /* more to do, but wait a short while before continuing */
401 tout = 10;
402 } 523 }
403 *last_lsn = last_pushed_lsn; 524
404 return tout; 525 /* There is more to do, requeue us. */
526 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
527 msecs_to_jiffies(tout));
528}
529
530/*
531 * This routine is called to move the tail of the AIL forward. It does this by
532 * trying to flush items in the AIL whose lsns are below the given
533 * threshold_lsn.
534 *
535 * The push is run asynchronously in a workqueue, which means the caller needs
536 * to handle waiting on the async flush for space to become available.
537 * We don't want to interrupt any push that is in progress, hence we only queue
538 * work if we set the pushing bit approriately.
539 *
540 * We do this unlocked - we only need to know whether there is anything in the
541 * AIL at the time we are called. We don't need to access the contents of
542 * any of the objects, so the lock is not needed.
543 */
544void
545xfs_ail_push(
546 struct xfs_ail *ailp,
547 xfs_lsn_t threshold_lsn)
548{
549 xfs_log_item_t *lip;
550
551 lip = xfs_ail_min(ailp);
552 if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
553 XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
554 return;
555
556 /*
557 * Ensure that the new target is noticed in push code before it clears
558 * the XFS_AIL_PUSHING_BIT.
559 */
560 smp_wmb();
561 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
562 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
563 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
405} 564}
406 565
566/*
567 * Push out all items in the AIL immediately
568 */
569void
570xfs_ail_push_all(
571 struct xfs_ail *ailp)
572{
573 xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp);
574
575 if (threshold_lsn)
576 xfs_ail_push(ailp, threshold_lsn);
577}
407 578
408/* 579/*
409 * This is to be called when an item is unlocked that may have 580 * This is to be called when an item is unlocked that may have
@@ -615,7 +786,6 @@ xfs_trans_ail_init(
615 xfs_mount_t *mp) 786 xfs_mount_t *mp)
616{ 787{
617 struct xfs_ail *ailp; 788 struct xfs_ail *ailp;
618 int error;
619 789
620 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 790 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
621 if (!ailp) 791 if (!ailp)
@@ -624,15 +794,9 @@ xfs_trans_ail_init(
624 ailp->xa_mount = mp; 794 ailp->xa_mount = mp;
625 INIT_LIST_HEAD(&ailp->xa_ail); 795 INIT_LIST_HEAD(&ailp->xa_ail);
626 spin_lock_init(&ailp->xa_lock); 796 spin_lock_init(&ailp->xa_lock);
627 error = xfsaild_start(ailp); 797 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
628 if (error)
629 goto out_free_ailp;
630 mp->m_ail = ailp; 798 mp->m_ail = ailp;
631 return 0; 799 return 0;
632
633out_free_ailp:
634 kmem_free(ailp);
635 return error;
636} 800}
637 801
638void 802void
@@ -641,124 +805,6 @@ xfs_trans_ail_destroy(
641{ 805{
642 struct xfs_ail *ailp = mp->m_ail; 806 struct xfs_ail *ailp = mp->m_ail;
643 807
644 xfsaild_stop(ailp); 808 cancel_delayed_work_sync(&ailp->xa_work);
645 kmem_free(ailp); 809 kmem_free(ailp);
646} 810}
647
648/*
649 * splice the log item list into the AIL at the given LSN.
650 */
651STATIC void
652xfs_ail_splice(
653 struct xfs_ail *ailp,
654 struct list_head *list,
655 xfs_lsn_t lsn)
656{
657 xfs_log_item_t *next_lip;
658
659 /*
660 * If the list is empty, just insert the item.
661 */
662 if (list_empty(&ailp->xa_ail)) {
663 list_splice(list, &ailp->xa_ail);
664 return;
665 }
666
667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
669 break;
670 }
671
672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
674
675 list_splice_init(list, &next_lip->li_ail);
676 return;
677}
678
679/*
680 * Delete the given item from the AIL. Return a pointer to the item.
681 */
682STATIC void
683xfs_ail_delete(
684 struct xfs_ail *ailp,
685 xfs_log_item_t *lip)
686{
687 xfs_ail_check(ailp, lip);
688 list_del(&lip->li_ail);
689 xfs_trans_ail_cursor_clear(ailp, lip);
690}
691
692/*
693 * Return a pointer to the first item in the AIL.
694 * If the AIL is empty, then return NULL.
695 */
696STATIC xfs_log_item_t *
697xfs_ail_min(
698 struct xfs_ail *ailp)
699{
700 if (list_empty(&ailp->xa_ail))
701 return NULL;
702
703 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
704}
705
706/*
707 * Return a pointer to the item which follows
708 * the given item in the AIL. If the given item
709 * is the last item in the list, then return NULL.
710 */
711STATIC xfs_log_item_t *
712xfs_ail_next(
713 struct xfs_ail *ailp,
714 xfs_log_item_t *lip)
715{
716 if (lip->li_ail.next == &ailp->xa_ail)
717 return NULL;
718
719 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
720}
721
722#ifdef DEBUG
723/*
724 * Check that the list is sorted as it should be.
725 */
726STATIC void
727xfs_ail_check(
728 struct xfs_ail *ailp,
729 xfs_log_item_t *lip)
730{
731 xfs_log_item_t *prev_lip;
732
733 if (list_empty(&ailp->xa_ail))
734 return;
735
736 /*
737 * Check the next and previous entries are valid.
738 */
739 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
740 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
741 if (&prev_lip->li_ail != &ailp->xa_ail)
742 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
743
744 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
745 if (&prev_lip->li_ail != &ailp->xa_ail)
746 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
747
748
749#ifdef XFS_TRANS_DEBUG
750 /*
751 * Walk the list checking lsn ordering, and that every entry has the
752 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
753 * when specifically debugging the transaction subsystem.
754 */
755 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
756 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
757 if (&prev_lip->li_ail != &ailp->xa_ail)
758 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
759 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
760 prev_lip = lip;
761 }
762#endif /* XFS_TRANS_DEBUG */
763}
764#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 16084d8ea231..048b0c689d3e 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -81,7 +81,7 @@ xfs_trans_ijoin(
81 * 81 *
82 * 82 *
83 * Grabs a reference to the inode which will be dropped when the transaction 83 * Grabs a reference to the inode which will be dropped when the transaction
84 * is commited. The inode will also be unlocked at that point. The inode 84 * is committed. The inode will also be unlocked at that point. The inode
85 * must be locked, and it cannot be associated with any transaction. 85 * must be locked, and it cannot be associated with any transaction.
86 */ 86 */
87void 87void
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35162c238fa3..6b164e9e9a1f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,16 +65,22 @@ struct xfs_ail_cursor {
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct list_head xa_ail; 67 struct list_head xa_ail;
68 uint xa_gen;
69 struct task_struct *xa_task;
70 xfs_lsn_t xa_target; 68 xfs_lsn_t xa_target;
71 struct xfs_ail_cursor xa_cursors; 69 struct xfs_ail_cursor xa_cursors;
72 spinlock_t xa_lock; 70 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
73}; 74};
74 75
76#define XFS_AIL_PUSHING_BIT 0
77
75/* 78/*
76 * From xfs_trans_ail.c 79 * From xfs_trans_ail.c
77 */ 80 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
79 struct xfs_log_item **log_items, int nr_items, 85 struct xfs_log_item **log_items, int nr_items,
80 xfs_lsn_t lsn) __releases(ailp->xa_lock); 86 xfs_lsn_t lsn) __releases(ailp->xa_lock);
@@ -98,12 +104,13 @@ xfs_trans_ail_delete(
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1); 104 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99} 105}
100 106
101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 107void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
108void xfs_ail_push_all(struct xfs_ail *);
109xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
110
102void xfs_trans_unlocked_item(struct xfs_ail *, 111void xfs_trans_unlocked_item(struct xfs_ail *,
103 xfs_log_item_t *); 112 xfs_log_item_t *);
104 113
105xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
106
107struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
108 struct xfs_ail_cursor *cur, 115 struct xfs_ail_cursor *cur,
109 xfs_lsn_t lsn); 116 xfs_lsn_t lsn);
@@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
112void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
113 struct xfs_ail_cursor *cur); 120 struct xfs_ail_cursor *cur);
114 121
115long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
116void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
117int xfsaild_start(struct xfs_ail *);
118void xfsaild_stop(struct xfs_ail *);
119
120#if BITS_PER_LONG != 64 122#if BITS_PER_LONG != 64
121static inline void 123static inline void
122xfs_trans_ail_copy_lsn( 124xfs_trans_ail_copy_lsn(
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c48b4217ec47..b7a5fe7c52c8 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -953,7 +953,7 @@ xfs_release(
953 * If we previously truncated this file and removed old data 953 * If we previously truncated this file and removed old data
954 * in the process, we want to initiate "early" writeout on 954 * in the process, we want to initiate "early" writeout on
955 * the last close. This is an attempt to combat the notorious 955 * the last close. This is an attempt to combat the notorious
956 * NULL files problem which is particularly noticable from a 956 * NULL files problem which is particularly noticeable from a
957 * truncate down, buffered (re-)write (delalloc), followed by 957 * truncate down, buffered (re-)write (delalloc), followed by
958 * a crash. What we are effectively doing here is 958 * a crash. What we are effectively doing here is
959 * significantly reducing the time window where we'd otherwise 959 * significantly reducing the time window where we'd otherwise
@@ -982,7 +982,7 @@ xfs_release(
982 * 982 *
983 * Further, check if the inode is being opened, written and 983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks 984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server), 985 * outstanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to 986 * truncating the blocks past EOF will cause fragmentation to
987 * occur. 987 * occur.
988 * 988 *