aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/Kconfig19
-rw-r--r--fs/affs/namei.c5
-rw-r--r--fs/afs/dir.c5
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/bfs/dir.c3
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/block_dev.c34
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/buffer.c64
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c61
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c25
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/cifs/Kconfig15
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README15
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifs_debug.c28
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h7
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifs_spnego.h2
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsacl.c490
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c26
-rw-r--r--fs/cifs/cifsfs.c338
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h139
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h237
-rw-r--r--fs/cifs/cifssmb.c838
-rw-r--r--fs/cifs/connect.c860
-rw-r--r--fs/cifs/dir.c33
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c543
-rw-r--r--fs/cifs/fscache.c6
-rw-r--r--fs/cifs/fscache.h8
-rw-r--r--fs/cifs/inode.c221
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/link.c46
-rw-r--r--fs/cifs/misc.c44
-rw-r--r--fs/cifs/netmisc.c9
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c51
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c264
-rw-r--r--fs/cifs/xattr.c28
-rw-r--r--fs/coda/dir.c5
-rw-r--r--fs/compat.c235
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/dlm/config.c9
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c182
-rw-r--r--fs/dlm/lock.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/main.c2
-rw-r--r--fs/dlm/plock.c65
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/drop_caches.c5
-rw-r--r--fs/ecryptfs/inode.c7
-rw-r--r--fs/ecryptfs/keystore.c46
-rw-r--r--fs/exec.c178
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext3/namei.c80
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/Makefile3
-rw-r--r--fs/ext4/balloc.c146
-rw-r--r--fs/ext4/ext4.h127
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h5
-rw-r--r--fs/ext4/extents.c1410
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/fsync.c25
-rw-r--r--fs/ext4/inode.c114
-rw-r--r--fs/ext4/mballoc.c459
-rw-r--r--fs/ext4/mballoc.h6
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c351
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/namei.c82
-rw-r--r--fs/ext4/page-io.c39
-rw-r--r--fs/ext4/super.c206
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/fat/cache.c7
-rw-r--r--fs/fat/dir.c32
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/misc.c44
-rw-r--r--fs/fat/namei_msdos.c9
-rw-r--r--fs/fat/namei_vfat.c9
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fscache/operation.c10
-rw-r--r--fs/fscache/page.c13
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c197
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c46
-rw-r--r--fs/gfs2/glock.c99
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c172
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1510
-rw-r--r--fs/gfs2/inode.h8
-rw-r--r--fs/gfs2/log.c208
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/quota.c12
-rw-r--r--fs/gfs2/quota.h4
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c138
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/hfs/dir.c6
-rw-r--r--fs/hfsplus/dir.c8
-rw-r--r--fs/hostfs/hostfs_kern.c5
-rw-r--r--fs/hpfs/namei.c9
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c10
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/journal.c16
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jbd2/commit.c28
-rw-r--r--fs/jbd2/journal.c58
-rw-r--r--fs/jbd2/transaction.c22
-rw-r--r--fs/jffs2/dir.c5
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/dir.c5
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/mbcache.c10
-rw-r--r--fs/minix/namei.c5
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/namei.c382
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/dir.c5
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/namei.c5
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c166
-rw-r--r--fs/ocfs2/alloc.h1
-rw-r--r--fs/ocfs2/cluster/sys.c9
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h14
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c94
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c255
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c1
-rw-r--r--fs/ocfs2/ioctl.c492
-rw-r--r--fs/ocfs2/move_extents.c1152
-rw-r--r--fs/ocfs2/move_extents.h22
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h68
-rw-r--r--fs/ocfs2/ocfs2_trace.h25
-rw-r--r--fs/ocfs2/refcounttree.c60
-rw-r--r--fs/ocfs2/refcounttree.h11
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/omfs/dir.c11
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/partitions/efi.c9
-rw-r--r--fs/partitions/ldm.c7
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c103
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h26
-rw-r--r--fs/proc/namespaces.c198
-rw-r--r--fs/proc/stat.c6
-rw-r--r--fs/proc/task_mmu.c233
-rw-r--r--fs/proc/vmcore.c52
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/reiserfs/namei.c5
-rw-r--r--fs/reiserfs/xattr.c1
-rw-r--r--fs/splice.c33
-rw-r--r--fs/squashfs/Kconfig4
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/cache.c33
-rw-r--r--fs/squashfs/decompressor.c2
-rw-r--r--fs/squashfs/decompressor.h2
-rw-r--r--fs/squashfs/dir.c2
-rw-r--r--fs/squashfs/export.c42
-rw-r--r--fs/squashfs/file.c2
-rw-r--r--fs/squashfs/fragment.c37
-rw-r--r--fs/squashfs/id.c42
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/squashfs/namei.c2
-rw-r--r--fs/squashfs/squashfs.h10
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_i.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c112
-rw-r--r--fs/squashfs/symlink.c2
-rw-r--r--fs/squashfs/xattr.c2
-rw-r--r--fs/squashfs/xattr.h3
-rw-r--r--fs/squashfs/xattr_id.c47
-rw-r--r--fs/squashfs/xz_wrapper.c2
-rw-r--r--fs/squashfs/zlib_wrapper.c2
-rw-r--r--fs/super.c6
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/timerfd.c102
-rw-r--r--fs/ubifs/budget.c104
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c167
-rw-r--r--fs/ubifs/debug.h178
-rw-r--r--fs/ubifs/dir.c9
-rw-r--r--fs/ubifs/file.c28
-rw-r--r--fs/ubifs/find.c10
-rw-r--r--fs/ubifs/gc.c71
-rw-r--r--fs/ubifs/io.c33
-rw-r--r--fs/ubifs/journal.c29
-rw-r--r--fs/ubifs/log.c28
-rw-r--r--fs/ubifs/lprops.c115
-rw-r--r--fs/ubifs/lpt_commit.c55
-rw-r--r--fs/ubifs/master.c8
-rw-r--r--fs/ubifs/misc.h17
-rw-r--r--fs/ubifs/orphan.c3
-rw-r--r--fs/ubifs/recovery.c354
-rw-r--r--fs/ubifs/replay.c468
-rw-r--r--fs/ubifs/sb.c153
-rw-r--r--fs/ubifs/super.c46
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/tnc_commit.c18
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h86
-rw-r--r--fs/ubifs/xattr.c8
-rw-r--r--fs/udf/namei.c5
-rw-r--r--fs/ufs/balloc.c9
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/namei.c5
-rw-r--r--fs/ufs/truncate.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c29
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h76
-rw-r--r--fs/xfs/quota/xfs_qm.c6
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_alloc.c871
-rw-r--r--fs/xfs/xfs_alloc.h18
-rw-r--r--fs/xfs/xfs_alloc_btree.c16
-rw-r--r--fs/xfs/xfs_bmap.c549
-rw-r--r--fs/xfs/xfs_bmap.h2
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_inode.c19
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c16
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_types.h2
318 files changed, 13616 insertions, 8657 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 814ac4e213a8..0a93dc1cb4ac 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -1,6 +1,6 @@
1config 9P_FS 1config 9P_FS
2 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2 tristate "Plan 9 Resource Sharing Support (9P2000)"
3 depends on INET && NET_9P && EXPERIMENTAL 3 depends on INET && NET_9P
4 help 4 help
5 If you say Y here, you will get experimental support for 5 If you say Y here, you will get experimental support for
6 Plan 9 resource sharing via the 9P2000 protocol. 6 Plan 9 resource sharing via the 9P2000 protocol.
@@ -10,7 +10,6 @@ config 9P_FS
10 If unsure, say N. 10 If unsure, say N.
11 11
12if 9P_FS 12if 9P_FS
13
14config 9P_FSCACHE 13config 9P_FSCACHE
15 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support (EXPERIMENTAL)"
16 depends on EXPERIMENTAL 15 depends on EXPERIMENTAL
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c67703195..8d7f3e69ae29 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -814,6 +814,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
814 814
815int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) 815int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
816{ 816{
817 dentry_unhash(d);
817 return v9fs_remove(i, d, 1); 818 return v9fs_remove(i, d, 1);
818} 819}
819 820
@@ -839,6 +840,9 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
839 struct p9_fid *newdirfid; 840 struct p9_fid *newdirfid;
840 struct p9_wstat wstat; 841 struct p9_wstat wstat;
841 842
843 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
844 dentry_unhash(new_dentry);
845
842 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 846 P9_DPRINTK(P9_DEBUG_VFS, "\n");
843 retval = 0; 847 retval = 0;
844 old_inode = old_dentry->d_inode; 848 old_inode = old_dentry->d_inode;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 82a7c38ddad0..691c78f58bef 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
259 if (IS_ERR(inode_fid)) { 259 if (IS_ERR(inode_fid)) {
260 err = PTR_ERR(inode_fid); 260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex); 261 mutex_unlock(&v9inode->v_mutex);
262 goto error; 262 goto err_clunk_old_fid;
263 } 263 }
264 v9inode->writeback_fid = (void *) inode_fid; 264 v9inode->writeback_fid = (void *) inode_fid;
265 } 265 }
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
267 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
269 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
270 p9_client_clunk(ofid); 270 err = PTR_ERR(filp);
271 return PTR_ERR(filp); 271 goto err_clunk_old_fid;
272 } 272 }
273 filp->private_data = ofid; 273 filp->private_data = ofid;
274#ifdef CONFIG_9P_FSCACHE 274#ifdef CONFIG_9P_FSCACHE
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
278 return 0; 278 return 0;
279 279
280error: 280error:
281 if (ofid)
282 p9_client_clunk(ofid);
283 if (fid) 281 if (fid)
284 p9_client_clunk(fid); 282 p9_client_clunk(fid);
283err_clunk_old_fid:
284 if (ofid)
285 p9_client_clunk(ofid);
285 return err; 286 return err;
286} 287}
287 288
diff --git a/fs/Kconfig b/fs/Kconfig
index f3aa9b08b228..19891aab9c6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 bool 50 tristate
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
@@ -124,6 +124,7 @@ config TMPFS
124config TMPFS_POSIX_ACL 124config TMPFS_POSIX_ACL
125 bool "Tmpfs POSIX Access Control Lists" 125 bool "Tmpfs POSIX Access Control Lists"
126 depends on TMPFS 126 depends on TMPFS
127 select TMPFS_XATTR
127 select GENERIC_ACL 128 select GENERIC_ACL
128 help 129 help
129 POSIX Access Control Lists (ACLs) support permissions for users and 130 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -134,6 +135,22 @@ config TMPFS_POSIX_ACL
134 135
135 If you don't know what Access Control Lists are, say N. 136 If you don't know what Access Control Lists are, say N.
136 137
138config TMPFS_XATTR
139 bool "Tmpfs extended attributes"
140 depends on TMPFS
141 default n
142 help
143 Extended attributes are name:value pairs associated with inodes by
144 the kernel or by users (see the attr(5) manual page, or visit
145 <http://acl.bestbits.at/> for details).
146
147 Currently this enables support for the trusted.* and
148 security.* namespaces.
149
150 You need this for POSIX ACL support on tmpfs.
151
152 If unsure, say N.
153
137config HUGETLBFS 154config HUGETLBFS
138 bool "HugeTLB file system support" 155 bool "HugeTLB file system support"
139 depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ 156 depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index e3e9efc1fdd8..03330e2e390c 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -320,6 +320,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry)
320 dentry->d_inode->i_ino, 320 dentry->d_inode->i_ino,
321 (int)dentry->d_name.len, dentry->d_name.name); 321 (int)dentry->d_name.len, dentry->d_name.name);
322 322
323 dentry_unhash(dentry);
324
323 return affs_remove_header(dentry); 325 return affs_remove_header(dentry);
324} 326}
325 327
@@ -417,6 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
417 struct buffer_head *bh = NULL; 419 struct buffer_head *bh = NULL;
418 int retval; 420 int retval;
419 421
422 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
423 dentry_unhash(new_dentry);
424
420 pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", 425 pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
421 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, 426 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
422 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); 427 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20c106f24927..2c4e05160042 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -845,6 +845,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
845 _enter("{%x:%u},{%s}", 845 _enter("{%x:%u},{%s}",
846 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); 846 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
847 847
848 dentry_unhash(dentry);
849
848 ret = -ENAMETOOLONG; 850 ret = -ENAMETOOLONG;
849 if (dentry->d_name.len >= AFSNAMEMAX) 851 if (dentry->d_name.len >= AFSNAMEMAX)
850 goto error; 852 goto error;
@@ -1146,6 +1148,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1146 struct key *key; 1148 struct key *key;
1147 int ret; 1149 int ret;
1148 1150
1151 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
1152 dentry_unhash(new_dentry);
1153
1149 vnode = AFS_FS_I(old_dentry->d_inode); 1154 vnode = AFS_FS_I(old_dentry->d_inode);
1150 orig_dvnode = AFS_FS_I(old_dir); 1155 orig_dvnode = AFS_FS_I(old_dir);
1151 new_dvnode = AFS_FS_I(new_dir); 1156 new_dvnode = AFS_FS_I(new_dir);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137e..87d95a8cddbc 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -583,6 +583,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
583 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) 583 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
584 return -EACCES; 584 return -EACCES;
585 585
586 dentry_unhash(dentry);
587
586 if (atomic_dec_and_test(&ino->count)) { 588 if (atomic_dec_and_test(&ino->count)) {
587 p_ino = autofs4_dentry_ino(dentry->d_parent); 589 p_ino = autofs4_dentry_ino(dentry->d_parent);
588 if (p_ino && dentry->d_parent != dentry) 590 if (p_ino && dentry->d_parent != dentry)
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index b14cebfd9047..c7d1d06b0483 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -224,6 +224,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
224 struct bfs_sb_info *info; 224 struct bfs_sb_info *info;
225 int error = -ENOENT; 225 int error = -ENOENT;
226 226
227 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
228 dentry_unhash(new_dentry);
229
227 old_bh = new_bh = NULL; 230 old_bh = new_bh = NULL;
228 old_inode = old_dentry->d_inode; 231 old_inode = old_dentry->d_inode;
229 if (S_ISDIR(old_inode->i_mode)) 232 if (S_ISDIR(old_inode->i_mode))
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 397d3057d336..1bffbe0ed778 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
820 int res; 820 int res;
821 char buf[16]; 821 char buf[16];
822 822
823 memset(&bprm, 0, sizeof(bprm));
824
823 /* Create the file name */ 825 /* Create the file name */
824 sprintf(buf, "/lib/lib%d.so", id); 826 sprintf(buf, "/lib/lib%d.so", id);
825 827
@@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
835 if (!bprm.cred) 837 if (!bprm.cred)
836 goto out; 838 goto out;
837 839
840 /* We don't really care about recalculating credentials at this point
841 * as we're past the point of no return and are dealing with shared
842 * libraries.
843 */
844 bprm.cred_prepared = 1;
845
838 res = prepare_binprm(&bprm); 846 res = prepare_binprm(&bprm);
839 847
840 if (!IS_ERR_VALUE(res)) 848 if (!IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 257b00e98428..1f2b19978333 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1120,6 +1120,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1120 goto restart; 1120 goto restart;
1121 } 1121 }
1122 } 1122 }
1123
1124 if (!ret && !bdev->bd_openers) {
1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1126 bdi = blk_get_backing_dev_info(bdev);
1127 if (bdi == NULL)
1128 bdi = &default_backing_dev_info;
1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1130 }
1131
1123 /* 1132 /*
1124 * If the device is invalidated, rescan partition 1133 * If the device is invalidated, rescan partition
1125 * if open succeeded or failed with -ENOMEDIUM. 1134 * if open succeeded or failed with -ENOMEDIUM.
@@ -1130,14 +1139,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1130 rescan_partitions(disk, bdev); 1139 rescan_partitions(disk, bdev);
1131 if (ret) 1140 if (ret)
1132 goto out_clear; 1141 goto out_clear;
1133
1134 if (!bdev->bd_openers) {
1135 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1136 bdi = blk_get_backing_dev_info(bdev);
1137 if (bdi == NULL)
1138 bdi = &default_backing_dev_info;
1139 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1140 }
1141 } else { 1142 } else {
1142 struct block_device *whole; 1143 struct block_device *whole;
1143 whole = bdget_disk(disk, 0); 1144 whole = bdget_disk(disk, 0);
@@ -1237,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1237 res = __blkdev_get(bdev, mode, 0); 1238 res = __blkdev_get(bdev, mode, 0);
1238 1239
1239 if (whole) { 1240 if (whole) {
1241 struct gendisk *disk = whole->bd_disk;
1242
1240 /* finish claiming */ 1243 /* finish claiming */
1241 mutex_lock(&bdev->bd_mutex); 1244 mutex_lock(&bdev->bd_mutex);
1242 spin_lock(&bdev_lock); 1245 spin_lock(&bdev_lock);
@@ -1263,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1263 spin_unlock(&bdev_lock); 1266 spin_unlock(&bdev_lock);
1264 1267
1265 /* 1268 /*
1266 * Block event polling for write claims. Any write 1269 * Block event polling for write claims if requested. Any
1267 * holder makes the write_holder state stick until all 1270 * write holder makes the write_holder state stick until
1268 * are released. This is good enough and tracking 1271 * all are released. This is good enough and tracking
1269 * individual writeable reference is too fragile given 1272 * individual writeable reference is too fragile given the
1270 * the way @mode is used in blkdev_get/put(). 1273 * way @mode is used in blkdev_get/put().
1271 */ 1274 */
1272 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1273 bdev->bd_write_holder = true; 1277 bdev->bd_write_holder = true;
1274 disk_block_events(bdev->bd_disk); 1278 disk_block_events(disk);
1275 } 1279 }
1276 1280
1277 mutex_unlock(&bdev->bd_mutex); 1281 mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0e0fe0f6ec75..c5d9fbb92bc3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,8 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
14#include <linux/cleancache.h>
13#include "extent_io.h" 15#include "extent_io.h"
14#include "extent_map.h" 16#include "extent_map.h"
15#include "compat.h" 17#include "compat.h"
@@ -1969,6 +1971,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1969 1971
1970 set_page_extent_mapped(page); 1972 set_page_extent_mapped(page);
1971 1973
1974 if (!PageUptodate(page)) {
1975 if (cleancache_get_page(page) == 0) {
1976 BUG_ON(blocksize != PAGE_SIZE);
1977 goto out;
1978 }
1979 }
1980
1972 end = page_end; 1981 end = page_end;
1973 while (1) { 1982 while (1) {
1974 lock_extent(tree, start, end, GFP_NOFS); 1983 lock_extent(tree, start, end, GFP_NOFS);
@@ -2102,6 +2111,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2102 cur = cur + iosize; 2111 cur = cur + iosize;
2103 pg_offset += iosize; 2112 pg_offset += iosize;
2104 } 2113 }
2114out:
2105 if (!nr) { 2115 if (!nr) {
2106 if (!PageError(page)) 2116 if (!PageError(page))
2107 SetPageUptodate(page); 2117 SetPageUptodate(page);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fa2c5d87f219..ca38eca70af0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -710,7 +710,7 @@ again:
710 WARN_ON(cur->checked); 710 WARN_ON(cur->checked);
711 if (!list_empty(&cur->upper)) { 711 if (!list_empty(&cur->upper)) {
712 /* 712 /*
713 * the backref was added previously when processsing 713 * the backref was added previously when processing
714 * backref of type BTRFS_TREE_BLOCK_REF_KEY 714 * backref of type BTRFS_TREE_BLOCK_REF_KEY
715 */ 715 */
716 BUG_ON(!list_is_singular(&cur->upper)); 716 BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 28e3cb2607ff..9b2e7e5bc3ef 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -39,6 +39,7 @@
39#include <linux/miscdevice.h> 39#include <linux/miscdevice.h>
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h>
42#include "compat.h" 43#include "compat.h"
43#include "delayed-inode.h" 44#include "delayed-inode.h"
44#include "ctree.h" 45#include "ctree.h"
@@ -632,6 +633,7 @@ static int btrfs_fill_super(struct super_block *sb,
632 sb->s_root = root_dentry; 633 sb->s_root = root_dentry;
633 634
634 save_mount_options(sb, data); 635 save_mount_options(sb, data);
636 cleancache_init_fs(sb);
635 return 0; 637 return 0;
636 638
637fail_close: 639fail_close:
diff --git a/fs/buffer.c b/fs/buffer.c
index a08bb8e61c6f..698c6b2cc462 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <linux/cleancache.h>
44 45
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46 47
@@ -269,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev)
269 invalidate_bh_lrus(); 270 invalidate_bh_lrus();
270 lru_add_drain_all(); /* make sure all lru add caches are flushed */ 271 lru_add_drain_all(); /* make sure all lru add caches are flushed */
271 invalidate_mapping_pages(mapping, 0, -1); 272 invalidate_mapping_pages(mapping, 0, -1);
273 /* 99% of the time, we don't need to flush the cleancache on the bdev.
274 * But, for the strange corners, lets be cautious
275 */
276 cleancache_flush_inode(mapping);
272} 277}
273EXPORT_SYMBOL(invalidate_bdev); 278EXPORT_SYMBOL(invalidate_bdev);
274 279
@@ -2331,24 +2336,26 @@ EXPORT_SYMBOL(block_commit_write);
2331 * page lock we can determine safely if the page is beyond EOF. If it is not 2336 * page lock we can determine safely if the page is beyond EOF. If it is not
2332 * beyond EOF, then the page is guaranteed safe against truncation until we 2337 * beyond EOF, then the page is guaranteed safe against truncation until we
2333 * unlock the page. 2338 * unlock the page.
2339 *
2340 * Direct callers of this function should call vfs_check_frozen() so that page
2341 * fault does not busyloop until the fs is thawed.
2334 */ 2342 */
2335int 2343int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2336block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2344 get_block_t get_block)
2337 get_block_t get_block)
2338{ 2345{
2339 struct page *page = vmf->page; 2346 struct page *page = vmf->page;
2340 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2347 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2341 unsigned long end; 2348 unsigned long end;
2342 loff_t size; 2349 loff_t size;
2343 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 2350 int ret;
2344 2351
2345 lock_page(page); 2352 lock_page(page);
2346 size = i_size_read(inode); 2353 size = i_size_read(inode);
2347 if ((page->mapping != inode->i_mapping) || 2354 if ((page->mapping != inode->i_mapping) ||
2348 (page_offset(page) > size)) { 2355 (page_offset(page) > size)) {
2349 /* page got truncated out from underneath us */ 2356 /* We overload EFAULT to mean page got truncated */
2350 unlock_page(page); 2357 ret = -EFAULT;
2351 goto out; 2358 goto out_unlock;
2352 } 2359 }
2353 2360
2354 /* page is wholly or partially inside EOF */ 2361 /* page is wholly or partially inside EOF */
@@ -2361,18 +2368,41 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2361 if (!ret) 2368 if (!ret)
2362 ret = block_commit_write(page, 0, end); 2369 ret = block_commit_write(page, 0, end);
2363 2370
2364 if (unlikely(ret)) { 2371 if (unlikely(ret < 0))
2365 unlock_page(page); 2372 goto out_unlock;
2366 if (ret == -ENOMEM) 2373 /*
2367 ret = VM_FAULT_OOM; 2374 * Freezing in progress? We check after the page is marked dirty and
2368 else /* -ENOSPC, -EIO, etc */ 2375 * with page lock held so if the test here fails, we are sure freezing
2369 ret = VM_FAULT_SIGBUS; 2376 * code will wait during syncing until the page fault is done - at that
2370 } else 2377 * point page will be dirty and unlocked so freezing code will write it
2371 ret = VM_FAULT_LOCKED; 2378 * and writeprotect it again.
2372 2379 */
2373out: 2380 set_page_dirty(page);
2381 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2382 ret = -EAGAIN;
2383 goto out_unlock;
2384 }
2385 return 0;
2386out_unlock:
2387 unlock_page(page);
2374 return ret; 2388 return ret;
2375} 2389}
2390EXPORT_SYMBOL(__block_page_mkwrite);
2391
2392int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2393 get_block_t get_block)
2394{
2395 int ret;
2396 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2397
2398 /*
2399 * This check is racy but catches the common case. The check in
2400 * __block_page_mkwrite() is reliable.
2401 */
2402 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2403 ret = __block_page_mkwrite(vma, vmf, get_block);
2404 return block_page_mkwrite_return(ret);
2405}
2376EXPORT_SYMBOL(block_page_mkwrite); 2406EXPORT_SYMBOL(block_page_mkwrite);
2377 2407
2378/* 2408/*
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 38b8ab554924..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -848,7 +848,8 @@ get_more_pages:
848 op->payload_len = cpu_to_le32(len); 848 op->payload_len = cpu_to_le32(len);
849 req->r_request->hdr.data_len = cpu_to_le32(len); 849 req->r_request->hdr.data_len = cpu_to_le32(len);
850 850
851 ceph_osdc_start_request(&fsc->client->osdc, req, true); 851 rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
852 BUG_ON(rc);
852 req = NULL; 853 req = NULL;
853 854
854 /* continue? */ 855 /* continue? */
@@ -880,8 +881,6 @@ release_pvec_pages:
880out: 881out:
881 if (req) 882 if (req)
882 ceph_osdc_put_request(req); 883 ceph_osdc_put_request(req);
883 if (rc > 0)
884 rc = 0; /* vfs expects us to return 0 */
885 ceph_put_snap_context(snapc); 884 ceph_put_snap_context(snapc);
886 dout("writepages done, rc = %d\n", rc); 885 dout("writepages done, rc = %d\n", rc);
887 return rc; 886 return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2a5404c1c42f..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
569 list_add_tail(&cap->session_caps, &session->s_caps); 569 list_add_tail(&cap->session_caps, &session->s_caps);
570 session->s_nr_caps++; 570 session->s_nr_caps++;
571 spin_unlock(&session->s_cap_lock); 571 spin_unlock(&session->s_cap_lock);
572 } 572 } else if (new_cap)
573 ceph_put_cap(mdsc, new_cap);
573 574
574 if (!ci->i_snap_realm) { 575 if (!ci->i_snap_realm) {
575 /* 576 /*
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2634 struct ceph_mds_session *session, 2635 struct ceph_mds_session *session,
2635 int *open_target_sessions) 2636 int *open_target_sessions)
2636{ 2637{
2638 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2637 struct ceph_inode_info *ci = ceph_inode(inode); 2639 struct ceph_inode_info *ci = ceph_inode(inode);
2638 int mds = session->s_mds; 2640 int mds = session->s_mds;
2639 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2641 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2670 * export targets, so that we get the matching IMPORT 2672 * export targets, so that we get the matching IMPORT
2671 */ 2673 */
2672 *open_target_sessions = 1; 2674 *open_target_sessions = 1;
2675
2676 /*
2677 * we can't flush dirty caps that we've seen the
2678 * EXPORT but no IMPORT for
2679 */
2680 spin_lock(&mdsc->cap_dirty_lock);
2681 if (!list_empty(&ci->i_dirty_item)) {
2682 dout(" moving %p to cap_dirty_migrating\n",
2683 inode);
2684 list_move(&ci->i_dirty_item,
2685 &mdsc->cap_dirty_migrating);
2686 }
2687 spin_unlock(&mdsc->cap_dirty_lock);
2673 } 2688 }
2674 __ceph_remove_cap(cap); 2689 __ceph_remove_cap(cap);
2675 } 2690 }
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2707 ci->i_cap_exporting_issued = 0; 2722 ci->i_cap_exporting_issued = 0;
2708 ci->i_cap_exporting_mseq = 0; 2723 ci->i_cap_exporting_mseq = 0;
2709 ci->i_cap_exporting_mds = -1; 2724 ci->i_cap_exporting_mds = -1;
2725
2726 spin_lock(&mdsc->cap_dirty_lock);
2727 if (!list_empty(&ci->i_dirty_item)) {
2728 dout(" moving %p back to cap_dirty\n", inode);
2729 list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
2730 }
2731 spin_unlock(&mdsc->cap_dirty_lock);
2710 } else { 2732 } else {
2711 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", 2733 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
2712 inode, ci, mds, mseq); 2734 inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
2910 */ 2932 */
2911void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) 2933void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2912{ 2934{
2913 struct ceph_inode_info *ci, *nci = NULL; 2935 struct ceph_inode_info *ci;
2914 struct inode *inode, *ninode = NULL; 2936 struct inode *inode;
2915 struct list_head *p, *n;
2916 2937
2917 dout("flush_dirty_caps\n"); 2938 dout("flush_dirty_caps\n");
2918 spin_lock(&mdsc->cap_dirty_lock); 2939 spin_lock(&mdsc->cap_dirty_lock);
2919 list_for_each_safe(p, n, &mdsc->cap_dirty) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2920 if (nci) { 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2921 ci = nci; 2942 i_dirty_item);
2922 inode = ninode; 2943 inode = igrab(&ci->vfs_inode);
2923 ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; 2944 dout("flush_dirty_caps %p\n", inode);
2924 dout("flush_dirty_caps inode %p (was next inode)\n",
2925 inode);
2926 } else {
2927 ci = list_entry(p, struct ceph_inode_info,
2928 i_dirty_item);
2929 inode = igrab(&ci->vfs_inode);
2930 BUG_ON(!inode);
2931 dout("flush_dirty_caps inode %p\n", inode);
2932 }
2933 if (n != &mdsc->cap_dirty) {
2934 nci = list_entry(n, struct ceph_inode_info,
2935 i_dirty_item);
2936 ninode = igrab(&nci->vfs_inode);
2937 BUG_ON(!ninode);
2938 nci->i_ceph_flags |= CEPH_I_NOFLUSH;
2939 dout("flush_dirty_caps next inode %p, noflush\n",
2940 ninode);
2941 } else {
2942 nci = NULL;
2943 ninode = NULL;
2944 }
2945 spin_unlock(&mdsc->cap_dirty_lock); 2945 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2946 if (inode) {
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2951 spin_lock(&mdsc->cap_dirty_lock); 2951 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2952 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2953 spin_unlock(&mdsc->cap_dirty_lock);
2954 dout("flush_dirty_caps done\n");
2954} 2955}
2955 2956
2956/* 2957/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1a867a3601ae..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -360,7 +360,7 @@ more:
360 rinfo = &fi->last_readdir->r_reply_info; 360 rinfo = &fi->last_readdir->r_reply_info;
361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
362 rinfo->dir_nr, off, fi->offset); 362 rinfo->dir_nr, off, fi->offset);
363 while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { 363 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
364 u64 pos = ceph_make_fpos(frag, off); 364 u64 pos = ceph_make_fpos(frag, off);
365 struct ceph_mds_reply_inode *in = 365 struct ceph_mds_reply_inode *in =
366 rinfo->dir_in[off - fi->offset].in; 366 rinfo->dir_in[off - fi->offset].in;
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1066 struct inode *inode = file->f_dentry->d_inode; 1066 struct inode *inode = file->f_dentry->d_inode;
1067 struct ceph_inode_info *ci = ceph_inode(inode); 1067 struct ceph_inode_info *ci = ceph_inode(inode);
1068 int left; 1068 int left;
1069 const int bufsize = 1024;
1069 1070
1070 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1071 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1071 return -EISDIR; 1072 return -EISDIR;
1072 1073
1073 if (!cf->dir_info) { 1074 if (!cf->dir_info) {
1074 cf->dir_info = kmalloc(1024, GFP_NOFS); 1075 cf->dir_info = kmalloc(bufsize, GFP_NOFS);
1075 if (!cf->dir_info) 1076 if (!cf->dir_info)
1076 return -ENOMEM; 1077 return -ENOMEM;
1077 cf->dir_info_len = 1078 cf->dir_info_len =
1078 sprintf(cf->dir_info, 1079 snprintf(cf->dir_info, bufsize,
1079 "entries: %20lld\n" 1080 "entries: %20lld\n"
1080 " files: %20lld\n" 1081 " files: %20lld\n"
1081 " subdirs: %20lld\n" 1082 " subdirs: %20lld\n"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
86static struct dentry *__fh_to_dentry(struct super_block *sb, 86static struct dentry *__fh_to_dentry(struct super_block *sb,
87 struct ceph_nfs_fh *fh) 87 struct ceph_nfs_fh *fh)
88{ 88{
89 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
89 struct inode *inode; 90 struct inode *inode;
90 struct dentry *dentry; 91 struct dentry *dentry;
91 struct ceph_vino vino; 92 struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
95 vino.ino = fh->ino; 96 vino.ino = fh->ino;
96 vino.snap = CEPH_NOSNAP; 97 vino.snap = CEPH_NOSNAP;
97 inode = ceph_find_inode(sb, vino); 98 inode = ceph_find_inode(sb, vino);
98 if (!inode) 99 if (!inode) {
99 return ERR_PTR(-ESTALE); 100 struct ceph_mds_request *req;
101
102 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
103 USE_ANY_MDS);
104 if (IS_ERR(req))
105 return ERR_CAST(req);
106
107 req->r_ino1 = vino;
108 req->r_num_caps = 1;
109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode;
111 if (inode)
112 igrab(inode);
113 ceph_mdsc_put_request(req);
114 if (!inode)
115 return ERR_PTR(-ESTALE);
116 }
100 117
101 dentry = d_obtain_alias(inode); 118 dentry = d_obtain_alias(inode);
102 if (IS_ERR(dentry)) { 119 if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
148 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); 165 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
149 req->r_num_caps = 1; 166 req->r_num_caps = 1;
150 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode;
169 if (inode)
170 igrab(inode);
151 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
152 inode = ceph_find_inode(sb, vino);
153 if (!inode) 172 if (!inode)
154 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
155 } 174 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d0fae4ce9ba5..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
578 if (dir) { 578 if (dir) {
579 struct ceph_inode_info *ci = ceph_inode(dir); 579 struct ceph_inode_info *ci = ceph_inode(dir);
580 580
581 ihold(dir);
581 spin_lock(&ci->i_unsafe_lock); 582 spin_lock(&ci->i_unsafe_lock);
582 req->r_unsafe_dir = dir; 583 req->r_unsafe_dir = dir;
583 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); 584 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
598 spin_lock(&ci->i_unsafe_lock); 599 spin_lock(&ci->i_unsafe_lock);
599 list_del_init(&req->r_unsafe_dir_item); 600 list_del_init(&req->r_unsafe_dir_item);
600 spin_unlock(&ci->i_unsafe_lock); 601 spin_unlock(&ci->i_unsafe_lock);
602
603 iput(req->r_unsafe_dir);
604 req->r_unsafe_dir = NULL;
601 } 605 }
602 606
603 ceph_mdsc_put_request(req); 607 ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2691{ 2695{
2692 struct super_block *sb = mdsc->fsc->sb; 2696 struct super_block *sb = mdsc->fsc->sb;
2693 struct inode *inode; 2697 struct inode *inode;
2694 struct ceph_inode_info *ci;
2695 struct dentry *parent, *dentry; 2698 struct dentry *parent, *dentry;
2696 struct ceph_dentry_info *di; 2699 struct ceph_dentry_info *di;
2697 int mds = session->s_mds; 2700 int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2728 dout("handle_lease no inode %llx\n", vino.ino); 2731 dout("handle_lease no inode %llx\n", vino.ino);
2729 goto release; 2732 goto release;
2730 } 2733 }
2731 ci = ceph_inode(inode);
2732 2734
2733 /* dentry */ 2735 /* dentry */
2734 parent = d_find_alias(inode); 2736 parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3002 spin_lock_init(&mdsc->snap_flush_lock); 3004 spin_lock_init(&mdsc->snap_flush_lock);
3003 mdsc->cap_flush_seq = 0; 3005 mdsc->cap_flush_seq = 0;
3004 INIT_LIST_HEAD(&mdsc->cap_dirty); 3006 INIT_LIST_HEAD(&mdsc->cap_dirty);
3007 INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
3005 mdsc->num_cap_flushing = 0; 3008 mdsc->num_cap_flushing = 0;
3006 spin_lock_init(&mdsc->cap_dirty_lock); 3009 spin_lock_init(&mdsc->cap_dirty_lock);
3007 init_waitqueue_head(&mdsc->cap_flushing_wq); 3010 init_waitqueue_head(&mdsc->cap_flushing_wq);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
278 278
279 u64 cap_flush_seq; 279 u64 cap_flush_seq;
280 struct list_head cap_dirty; /* inodes with dirty caps */ 280 struct list_head cap_dirty; /* inodes with dirty caps */
281 struct list_head cap_dirty_migrating; /* ...that are migration... */
281 int num_cap_flushing; /* # caps we are flushing */ 282 int num_cap_flushing; /* # caps we are flushing */
282 spinlock_t cap_dirty_lock; /* protects above items */ 283 spinlock_t cap_dirty_lock; /* protects above items */
283 wait_queue_head_t cap_flushing_wq; 284 wait_queue_head_t cap_flushing_wq;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..1cd4c3a1862d 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_DES
10 help 11 help
11 This is the client VFS module for the Common Internet File System 12 This is the client VFS module for the Common Internet File System
12 (CIFS) protocol which is the successor to the Server Message Block 13 (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,8 @@ config CIFS_ACL
152 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
153 is handed over to the application/caller. 154 is handed over to the application/caller.
154 155
155config CIFS_EXPERIMENTAL 156config CIFS_NFSD_EXPORT
156 bool "CIFS Experimental Features (EXPERIMENTAL)" 157 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
157 depends on CIFS && EXPERIMENTAL 158 depends on CIFS && EXPERIMENTAL
158 help 159 help
159 Enables cifs features under testing. These features are 160 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
160 experimental and currently include DFS support and directory
161 change notification ie fcntl(F_DNOTIFY), as well as the upcall
162 mechanism which will be used for Kerberos session negotiation
163 and uid remapping. Some of these features also may depend on
164 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
165 (which is disabled by default). See the file fs/cifs/README
166 for more details. If unsure, say N.
167
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..c5c2c5e5f0f2 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -457,6 +457,9 @@ A partial list of the supported mount options follows:
457 otherwise - read from the server. All written data are stored 457 otherwise - read from the server. All written data are stored
458 in the cache, but if the client doesn't have Exclusive Oplock, 458 in the cache, but if the client doesn't have Exclusive Oplock,
459 it writes the data to the server. 459 it writes the data to the server.
460 rwpidforward Forward pid of a process who opened a file to any read or write
461 operation on that file. This prevent applications like WINE
462 from failing on read and write if we use mandatory brlock style.
460 acl Allow setfacl and getfacl to manage posix ACLs if server 463 acl Allow setfacl and getfacl to manage posix ACLs if server
461 supports them. (default) 464 supports them. (default)
462 noacl Do not allow setfacl and getfacl calls on this mount 465 noacl Do not allow setfacl and getfacl calls on this mount
@@ -704,18 +707,6 @@ the start of smb requests and responses can be enabled via:
704 707
705 echo 1 > /proc/fs/cifs/traceSMB 708 echo 1 > /proc/fs/cifs/traceSMB
706 709
707Two other experimental features are under development. To test these
708requires enabling CONFIG_CIFS_EXPERIMENTAL
709
710 cifsacl support needed to retrieve approximated mode bits based on
711 the contents on the CIFS ACL.
712
713 lease support: cifs will check the oplock state before calling into
714 the vfs to see if we can grant a lease on a file.
715
716 DNOTIFY fcntl: needed for support of directory change
717 notification and perhaps later for file leases)
718
719Per share (per client mount) statistics are available in /proc/fs/cifs/Stats 710Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
720if the kernel was configured with cifs statistics enabled. The statistics 711if the kernel was configured with cifs statistics enabled. The statistics
721represent the number of successful (ie non-zero return code from the server) 712represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 53d57a3fe427..dd8584d35a14 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -146,7 +146,7 @@ static char *extract_sharename(const char *treename)
146static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, 146static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
147 uint16_t maxbuf) 147 uint16_t maxbuf)
148{ 148{
149 const struct cifsTconInfo *tcon = cookie_netfs_data; 149 const struct cifs_tcon *tcon = cookie_netfs_data;
150 char *sharename; 150 char *sharename;
151 uint16_t len; 151 uint16_t len;
152 152
@@ -173,7 +173,7 @@ cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
173 uint16_t maxbuf) 173 uint16_t maxbuf)
174{ 174{
175 struct cifs_fscache_super_auxdata auxdata; 175 struct cifs_fscache_super_auxdata auxdata;
176 const struct cifsTconInfo *tcon = cookie_netfs_data; 176 const struct cifs_tcon *tcon = cookie_netfs_data;
177 177
178 memset(&auxdata, 0, sizeof(auxdata)); 178 memset(&auxdata, 0, sizeof(auxdata));
179 auxdata.resource_id = tcon->resource_id; 179 auxdata.resource_id = tcon->resource_id;
@@ -192,7 +192,7 @@ fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
192 uint16_t datalen) 192 uint16_t datalen)
193{ 193{
194 struct cifs_fscache_super_auxdata auxdata; 194 struct cifs_fscache_super_auxdata auxdata;
195 const struct cifsTconInfo *tcon = cookie_netfs_data; 195 const struct cifs_tcon *tcon = cookie_netfs_data;
196 196
197 if (datalen != sizeof(auxdata)) 197 if (datalen != sizeof(auxdata))
198 return FSCACHE_CHECKAUX_OBSOLETE; 198 return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..2fe3cf13b2e9 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb)); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 67}
68 68
69 69
@@ -110,8 +110,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
110 struct list_head *tmp1, *tmp2, *tmp3; 110 struct list_head *tmp1, *tmp2, *tmp3;
111 struct mid_q_entry *mid_entry; 111 struct mid_q_entry *mid_entry;
112 struct TCP_Server_Info *server; 112 struct TCP_Server_Info *server;
113 struct cifsSesInfo *ses; 113 struct cifs_ses *ses;
114 struct cifsTconInfo *tcon; 114 struct cifs_tcon *tcon;
115 int i, j; 115 int i, j;
116 __u32 dev_type; 116 __u32 dev_type;
117 117
@@ -152,7 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
152 tcp_ses_list); 152 tcp_ses_list);
153 i++; 153 i++;
154 list_for_each(tmp2, &server->smb_ses_list) { 154 list_for_each(tmp2, &server->smb_ses_list) {
155 ses = list_entry(tmp2, struct cifsSesInfo, 155 ses = list_entry(tmp2, struct cifs_ses,
156 smb_ses_list); 156 smb_ses_list);
157 if ((ses->serverDomain == NULL) || 157 if ((ses->serverDomain == NULL) ||
158 (ses->serverOS == NULL) || 158 (ses->serverOS == NULL) ||
@@ -171,7 +171,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
171 seq_printf(m, "TCP status: %d\n\tLocal Users To " 171 seq_printf(m, "TCP status: %d\n\tLocal Users To "
172 "Server: %d SecMode: 0x%x Req On Wire: %d", 172 "Server: %d SecMode: 0x%x Req On Wire: %d",
173 server->tcpStatus, server->srv_count, 173 server->tcpStatus, server->srv_count,
174 server->secMode, 174 server->sec_mode,
175 atomic_read(&server->inFlight)); 175 atomic_read(&server->inFlight));
176 176
177#ifdef CONFIG_CIFS_STATS2 177#ifdef CONFIG_CIFS_STATS2
@@ -183,7 +183,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
183 seq_puts(m, "\n\tShares:"); 183 seq_puts(m, "\n\tShares:");
184 j = 0; 184 j = 0;
185 list_for_each(tmp3, &ses->tcon_list) { 185 list_for_each(tmp3, &ses->tcon_list) {
186 tcon = list_entry(tmp3, struct cifsTconInfo, 186 tcon = list_entry(tmp3, struct cifs_tcon,
187 tcon_list); 187 tcon_list);
188 ++j; 188 ++j;
189 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 189 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
@@ -256,8 +256,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
256 int rc; 256 int rc;
257 struct list_head *tmp1, *tmp2, *tmp3; 257 struct list_head *tmp1, *tmp2, *tmp3;
258 struct TCP_Server_Info *server; 258 struct TCP_Server_Info *server;
259 struct cifsSesInfo *ses; 259 struct cifs_ses *ses;
260 struct cifsTconInfo *tcon; 260 struct cifs_tcon *tcon;
261 261
262 rc = get_user(c, buffer); 262 rc = get_user(c, buffer);
263 if (rc) 263 if (rc)
@@ -273,11 +273,11 @@ static ssize_t cifs_stats_proc_write(struct file *file,
273 server = list_entry(tmp1, struct TCP_Server_Info, 273 server = list_entry(tmp1, struct TCP_Server_Info,
274 tcp_ses_list); 274 tcp_ses_list);
275 list_for_each(tmp2, &server->smb_ses_list) { 275 list_for_each(tmp2, &server->smb_ses_list) {
276 ses = list_entry(tmp2, struct cifsSesInfo, 276 ses = list_entry(tmp2, struct cifs_ses,
277 smb_ses_list); 277 smb_ses_list);
278 list_for_each(tmp3, &ses->tcon_list) { 278 list_for_each(tmp3, &ses->tcon_list) {
279 tcon = list_entry(tmp3, 279 tcon = list_entry(tmp3,
280 struct cifsTconInfo, 280 struct cifs_tcon,
281 tcon_list); 281 tcon_list);
282 atomic_set(&tcon->num_smbs_sent, 0); 282 atomic_set(&tcon->num_smbs_sent, 0);
283 atomic_set(&tcon->num_writes, 0); 283 atomic_set(&tcon->num_writes, 0);
@@ -312,8 +312,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
312 int i; 312 int i;
313 struct list_head *tmp1, *tmp2, *tmp3; 313 struct list_head *tmp1, *tmp2, *tmp3;
314 struct TCP_Server_Info *server; 314 struct TCP_Server_Info *server;
315 struct cifsSesInfo *ses; 315 struct cifs_ses *ses;
316 struct cifsTconInfo *tcon; 316 struct cifs_tcon *tcon;
317 317
318 seq_printf(m, 318 seq_printf(m,
319 "Resources in use\nCIFS Session: %d\n", 319 "Resources in use\nCIFS Session: %d\n",
@@ -346,11 +346,11 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
346 server = list_entry(tmp1, struct TCP_Server_Info, 346 server = list_entry(tmp1, struct TCP_Server_Info,
347 tcp_ses_list); 347 tcp_ses_list);
348 list_for_each(tmp2, &server->smb_ses_list) { 348 list_for_each(tmp2, &server->smb_ses_list) {
349 ses = list_entry(tmp2, struct cifsSesInfo, 349 ses = list_entry(tmp2, struct cifs_ses,
350 smb_ses_list); 350 smb_ses_list);
351 list_for_each(tmp3, &ses->tcon_list) { 351 list_for_each(tmp3, &ses->tcon_list) {
352 tcon = list_entry(tmp3, 352 tcon = list_entry(tmp3,
353 struct cifsTconInfo, 353 struct cifs_tcon,
354 tcon_list); 354 tcon_list);
355 i++; 355 i++;
356 seq_printf(m, "\n%d) %s", i, tcon->treeName); 356 seq_printf(m, "\n%d) %s", i, tcon->treeName);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 2b68ac57d97d..8d8f28c94c0f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -272,7 +272,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
272 struct dfs_info3_param *referrals = NULL; 272 struct dfs_info3_param *referrals = NULL;
273 unsigned int num_referrals = 0; 273 unsigned int num_referrals = 0;
274 struct cifs_sb_info *cifs_sb; 274 struct cifs_sb_info *cifs_sb;
275 struct cifsSesInfo *ses; 275 struct cifs_ses *ses;
276 char *full_path; 276 char *full_path;
277 int xid, i; 277 int xid, i;
278 int rc; 278 int rc;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..ffb1459dc6ec 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -41,6 +41,7 @@
41#define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ 41#define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */
42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ 42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ 43#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
44#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */
44 45
45struct cifs_sb_info { 46struct cifs_sb_info {
46 struct rb_root tlink_tree; 47 struct rb_root tlink_tree;
@@ -56,11 +57,7 @@ struct cifs_sb_info {
56 mode_t mnt_file_mode; 57 mode_t mnt_file_mode;
57 mode_t mnt_dir_mode; 58 mode_t mnt_dir_mode;
58 unsigned int mnt_cifs_flags; 59 unsigned int mnt_cifs_flags;
59 int prepathlen; 60 char *mountdata; /* options received at mount time or via DFS refs */
60 char *prepath; /* relative path under the share to mount to */
61#ifdef CONFIG_CIFS_DFS_UPCALL
62 char *mountdata; /* mount options received at mount time */
63#endif
64 struct backing_dev_info bdi; 61 struct backing_dev_info bdi;
65 struct delayed_work prune_tlinks; 62 struct delayed_work prune_tlinks;
66}; 63};
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 33d221394aca..2272fd5fe5b7 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -95,7 +95,7 @@ struct key_type cifs_spnego_key_type = {
95 95
96/* get a key struct with a SPNEGO security blob, suitable for session setup */ 96/* get a key struct with a SPNEGO security blob, suitable for session setup */
97struct key * 97struct key *
98cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 98cifs_get_spnego_key(struct cifs_ses *sesInfo)
99{ 99{
100 struct TCP_Server_Info *server = sesInfo->server; 100 struct TCP_Server_Info *server = sesInfo->server;
101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; 101 struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index e4041ec4d712..31bef9ee078b 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -41,7 +41,7 @@ struct cifs_spnego_msg {
41 41
42#ifdef __KERNEL__ 42#ifdef __KERNEL__
43extern struct key_type cifs_spnego_key_type; 43extern struct key_type cifs_spnego_key_type;
44extern struct key *cifs_get_spnego_key(struct cifsSesInfo *sesInfo); 44extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo);
45#endif /* KERNEL */ 45#endif /* KERNEL */
46 46
47#endif /* _CIFS_SPNEGO_H */ 47#endif /* _CIFS_SPNEGO_H */
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_ucs(const char *src, const int maxlen, 82char *cifs_strndup_from_ucs(const char *src, const int maxlen,
83 const bool is_unicode, 83 const bool is_unicode,
84 const struct nls_table *codepage); 84 const struct nls_table *codepage);
85extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
87
85#endif 88#endif
86 89
87/* 90/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..5f02b4ee9a03 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,77 +23,404 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
26#include "cifspdu.h" 30#include "cifspdu.h"
27#include "cifsglob.h" 31#include "cifsglob.h"
28#include "cifsacl.h" 32#include "cifsacl.h"
29#include "cifsproto.h" 33#include "cifsproto.h"
30#include "cifs_debug.h" 34#include "cifs_debug.h"
31 35
32
33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
36 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
38 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
41;
42
43
44/* security id for everyone/world system group */ 36/* security id for everyone/world system group */
45static const struct cifs_sid sid_everyone = { 37static const struct cifs_sid sid_everyone = {
46 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
47/* security id for Authenticated Users system group */ 39/* security id for Authenticated Users system group */
48static const struct cifs_sid sid_authusers = { 40static const struct cifs_sid sid_authusers = {
49 1, 1, {0, 0, 0, 0, 0, 5}, {11} }; 41 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
50/* group users */ 42/* group users */
51static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
52 44
45const struct cred *root_cred;
53 46
54int match_sid(struct cifs_sid *ctsid) 47static void
48shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
49 int *nr_del)
55{ 50{
56 int i, j; 51 struct rb_node *node;
57 int num_subauth, num_sat, num_saw; 52 struct rb_node *tmp;
58 struct cifs_sid *cwsid; 53 struct cifs_sid_id *psidid;
54
55 node = rb_first(root);
56 while (node) {
57 tmp = node;
58 node = rb_next(tmp);
59 psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
60 if (nr_to_scan == 0 || *nr_del == nr_to_scan)
61 ++(*nr_rem);
62 else {
63 if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
64 && psidid->refcount == 0) {
65 rb_erase(tmp, root);
66 ++(*nr_del);
67 } else
68 ++(*nr_rem);
69 }
70 }
71}
72
73/*
74 * Run idmap cache shrinker.
75 */
76static int
77cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
78{
79 int nr_del = 0;
80 int nr_rem = 0;
81 struct rb_root *root;
82
83 root = &uidtree;
84 spin_lock(&siduidlock);
85 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
86 spin_unlock(&siduidlock);
87
88 root = &gidtree;
89 spin_lock(&sidgidlock);
90 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
91 spin_unlock(&sidgidlock);
92
93 return nr_rem;
94}
95
96static struct shrinker cifs_shrinker = {
97 .shrink = cifs_idmap_shrinker,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static int
102cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
103{
104 char *payload;
105
106 payload = kmalloc(datalen, GFP_KERNEL);
107 if (!payload)
108 return -ENOMEM;
109
110 memcpy(payload, data, datalen);
111 key->payload.data = payload;
112 return 0;
113}
114
115static inline void
116cifs_idmap_key_destroy(struct key *key)
117{
118 kfree(key->payload.data);
119}
59 120
60 if (!ctsid) 121struct key_type cifs_idmap_key_type = {
61 return -1; 122 .name = "cifs.idmap",
123 .instantiate = cifs_idmap_key_instantiate,
124 .destroy = cifs_idmap_key_destroy,
125 .describe = user_describe,
126 .match = user_match,
127};
128
129static void
130sid_to_str(struct cifs_sid *sidptr, char *sidstr)
131{
132 int i;
133 unsigned long saval;
134 char *strptr;
62 135
63 for (i = 0; i < NUM_WK_SIDS; ++i) { 136 strptr = sidstr;
64 cwsid = &(wksidarr[i].cifssid);
65 137
66 /* compare the revision */ 138 sprintf(strptr, "%s", "S");
67 if (ctsid->revision != cwsid->revision) 139 strptr = sidstr + strlen(sidstr);
68 continue;
69 140
70 /* compare all of the six auth values */ 141 sprintf(strptr, "-%d", sidptr->revision);
71 for (j = 0; j < 6; ++j) { 142 strptr = sidstr + strlen(sidstr);
72 if (ctsid->authority[j] != cwsid->authority[j]) 143
73 break; 144 for (i = 0; i < 6; ++i) {
145 if (sidptr->authority[i]) {
146 sprintf(strptr, "-%d", sidptr->authority[i]);
147 strptr = sidstr + strlen(sidstr);
74 } 148 }
75 if (j < 6) 149 }
76 continue; /* all of the auth values did not match */ 150
77 151 for (i = 0; i < sidptr->num_subauth; ++i) {
78 /* compare all of the subauth values if any */ 152 saval = le32_to_cpu(sidptr->sub_auth[i]);
79 num_sat = ctsid->num_subauth; 153 sprintf(strptr, "-%ld", saval);
80 num_saw = cwsid->num_subauth; 154 strptr = sidstr + strlen(sidstr);
81 num_subauth = num_sat < num_saw ? num_sat : num_saw; 155 }
82 if (num_subauth) { 156}
83 for (j = 0; j < num_subauth; ++j) { 157
84 if (ctsid->sub_auth[j] != cwsid->sub_auth[j]) 158static void
85 break; 159id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
86 } 160 struct cifs_sid_id **psidid, char *typestr)
87 if (j < num_subauth) 161{
88 continue; /* all sub_auth values do not match */ 162 int rc;
163 char *strptr;
164 struct rb_node *node = root->rb_node;
165 struct rb_node *parent = NULL;
166 struct rb_node **linkto = &(root->rb_node);
167 struct cifs_sid_id *lsidid;
168
169 while (node) {
170 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
171 parent = node;
172 rc = compare_sids(sidptr, &((lsidid)->sid));
173 if (rc > 0) {
174 linkto = &(node->rb_left);
175 node = node->rb_left;
176 } else if (rc < 0) {
177 linkto = &(node->rb_right);
178 node = node->rb_right;
179 }
180 }
181
182 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
183 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
184 (*psidid)->refcount = 0;
185
186 sprintf((*psidid)->sidstr, "%s", typestr);
187 strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
188 sid_to_str(&(*psidid)->sid, strptr);
189
190 clear_bit(SID_ID_PENDING, &(*psidid)->state);
191 clear_bit(SID_ID_MAPPED, &(*psidid)->state);
192
193 rb_link_node(&(*psidid)->rbnode, parent, linkto);
194 rb_insert_color(&(*psidid)->rbnode, root);
195}
196
197static struct cifs_sid_id *
198id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
199{
200 int rc;
201 struct rb_node *node = root->rb_node;
202 struct cifs_sid_id *lsidid;
203
204 while (node) {
205 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
206 rc = compare_sids(sidptr, &((lsidid)->sid));
207 if (rc > 0) {
208 node = node->rb_left;
209 } else if (rc < 0) {
210 node = node->rb_right;
211 } else /* node found */
212 return lsidid;
213 }
214
215 return NULL;
216}
217
218static int
219sidid_pending_wait(void *unused)
220{
221 schedule();
222 return signal_pending(current) ? -ERESTARTSYS : 0;
223}
224
225static int
226sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
227 struct cifs_fattr *fattr, uint sidtype)
228{
229 int rc;
230 unsigned long cid;
231 struct key *idkey;
232 const struct cred *saved_cred;
233 struct cifs_sid_id *psidid, *npsidid;
234 struct rb_root *cidtree;
235 spinlock_t *cidlock;
236
237 if (sidtype == SIDOWNER) {
238 cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
239 cidlock = &siduidlock;
240 cidtree = &uidtree;
241 } else if (sidtype == SIDGROUP) {
242 cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
243 cidlock = &sidgidlock;
244 cidtree = &gidtree;
245 } else
246 return -ENOENT;
247
248 spin_lock(cidlock);
249 psidid = id_rb_search(cidtree, psid);
250
251 if (!psidid) { /* node does not exist, allocate one & attempt adding */
252 spin_unlock(cidlock);
253 npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
254 if (!npsidid)
255 return -ENOMEM;
256
257 npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
258 if (!npsidid->sidstr) {
259 kfree(npsidid);
260 return -ENOMEM;
261 }
262
263 spin_lock(cidlock);
264 psidid = id_rb_search(cidtree, psid);
265 if (psidid) { /* node happened to get inserted meanwhile */
266 ++psidid->refcount;
267 spin_unlock(cidlock);
268 kfree(npsidid->sidstr);
269 kfree(npsidid);
270 } else {
271 psidid = npsidid;
272 id_rb_insert(cidtree, psid, &psidid,
273 sidtype == SIDOWNER ? "os:" : "gs:");
274 ++psidid->refcount;
275 spin_unlock(cidlock);
89 } 276 }
277 } else {
278 ++psidid->refcount;
279 spin_unlock(cidlock);
280 }
281
282 /*
283 * If we are here, it is safe to access psidid and its fields
284 * since a reference was taken earlier while holding the spinlock.
285 * A reference on the node is put without holding the spinlock
286 * and it is OK to do so in this case, shrinker will not erase
287 * this node until all references are put and we do not access
288 * any fields of the node after a reference is put .
289 */
290 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
291 cid = psidid->id;
292 psidid->time = jiffies; /* update ts for accessing */
293 goto sid_to_id_out;
294 }
90 295
91 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname); 296 if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
92 return 0; /* sids compare/match */ 297 goto sid_to_id_out;
298
299 if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
300 saved_cred = override_creds(root_cred);
301 idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
302 if (IS_ERR(idkey))
303 cFYI(1, "%s: Can't map SID to an id", __func__);
304 else {
305 cid = *(unsigned long *)idkey->payload.value;
306 psidid->id = cid;
307 set_bit(SID_ID_MAPPED, &psidid->state);
308 key_put(idkey);
309 kfree(psidid->sidstr);
310 }
311 revert_creds(saved_cred);
312 psidid->time = jiffies; /* update ts for accessing */
313 clear_bit(SID_ID_PENDING, &psidid->state);
314 wake_up_bit(&psidid->state, SID_ID_PENDING);
315 } else {
316 rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
317 sidid_pending_wait, TASK_INTERRUPTIBLE);
318 if (rc) {
319 cFYI(1, "%s: sidid_pending_wait interrupted %d",
320 __func__, rc);
321 --psidid->refcount; /* decremented without spinlock */
322 return rc;
323 }
324 if (test_bit(SID_ID_MAPPED, &psidid->state))
325 cid = psidid->id;
93 } 326 }
94 327
95 cFYI(1, "No matching sid"); 328sid_to_id_out:
96 return -1; 329 --psidid->refcount; /* decremented without spinlock */
330 if (sidtype == SIDOWNER)
331 fattr->cf_uid = cid;
332 else
333 fattr->cf_gid = cid;
334
335 return 0;
336}
337
338int
339init_cifs_idmap(void)
340{
341 struct cred *cred;
342 struct key *keyring;
343 int ret;
344
345 cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
346
347 /* create an override credential set with a special thread keyring in
348 * which requests are cached
349 *
350 * this is used to prevent malicious redirections from being installed
351 * with add_key().
352 */
353 cred = prepare_kernel_cred(NULL);
354 if (!cred)
355 return -ENOMEM;
356
357 keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
358 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
359 KEY_USR_VIEW | KEY_USR_READ,
360 KEY_ALLOC_NOT_IN_QUOTA);
361 if (IS_ERR(keyring)) {
362 ret = PTR_ERR(keyring);
363 goto failed_put_cred;
364 }
365
366 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
367 if (ret < 0)
368 goto failed_put_key;
369
370 ret = register_key_type(&cifs_idmap_key_type);
371 if (ret < 0)
372 goto failed_put_key;
373
374 /* instruct request_key() to use this special keyring as a cache for
375 * the results it looks up */
376 cred->thread_keyring = keyring;
377 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
378 root_cred = cred;
379
380 spin_lock_init(&siduidlock);
381 uidtree = RB_ROOT;
382 spin_lock_init(&sidgidlock);
383 gidtree = RB_ROOT;
384
385 register_shrinker(&cifs_shrinker);
386
387 cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
388 return 0;
389
390failed_put_key:
391 key_put(keyring);
392failed_put_cred:
393 put_cred(cred);
394 return ret;
395}
396
397void
398exit_cifs_idmap(void)
399{
400 key_revoke(root_cred->thread_keyring);
401 unregister_key_type(&cifs_idmap_key_type);
402 put_cred(root_cred);
403 unregister_shrinker(&cifs_shrinker);
404 cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
405}
406
407void
408cifs_destroy_idmaptrees(void)
409{
410 struct rb_root *root;
411 struct rb_node *node;
412
413 root = &uidtree;
414 spin_lock(&siduidlock);
415 while ((node = rb_first(root)))
416 rb_erase(node, root);
417 spin_unlock(&siduidlock);
418
419 root = &gidtree;
420 spin_lock(&sidgidlock);
421 while ((node = rb_first(root)))
422 rb_erase(node, root);
423 spin_unlock(&sidgidlock);
97} 424}
98 425
99/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 426/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
104 int num_subauth, num_sat, num_saw; 431 int num_subauth, num_sat, num_saw;
105 432
106 if ((!ctsid) || (!cwsid)) 433 if ((!ctsid) || (!cwsid))
107 return 0; 434 return 1;
108 435
109 /* compare the revision */ 436 /* compare the revision */
110 if (ctsid->revision != cwsid->revision) 437 if (ctsid->revision != cwsid->revision) {
111 return 0; 438 if (ctsid->revision > cwsid->revision)
439 return 1;
440 else
441 return -1;
442 }
112 443
113 /* compare all of the six auth values */ 444 /* compare all of the six auth values */
114 for (i = 0; i < 6; ++i) { 445 for (i = 0; i < 6; ++i) {
115 if (ctsid->authority[i] != cwsid->authority[i]) 446 if (ctsid->authority[i] != cwsid->authority[i]) {
116 return 0; 447 if (ctsid->authority[i] > cwsid->authority[i])
448 return 1;
449 else
450 return -1;
451 }
117 } 452 }
118 453
119 /* compare all of the subauth values if any */ 454 /* compare all of the subauth values if any */
@@ -122,12 +457,17 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
122 num_subauth = num_sat < num_saw ? num_sat : num_saw; 457 num_subauth = num_sat < num_saw ? num_sat : num_saw;
123 if (num_subauth) { 458 if (num_subauth) {
124 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
125 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
126 return 0; 461 if (le32_to_cpu(ctsid->sub_auth[i]) >
462 le32_to_cpu(cwsid->sub_auth[i]))
463 return 1;
464 else
465 return -1;
466 }
127 } 467 }
128 } 468 }
129 469
130 return 1; /* sids compare/match */ 470 return 0; /* sids compare/match */
131} 471}
132 472
133 473
@@ -382,22 +722,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
382#ifdef CONFIG_CIFS_DEBUG2 722#ifdef CONFIG_CIFS_DEBUG2
383 dump_ace(ppace[i], end_of_acl); 723 dump_ace(ppace[i], end_of_acl);
384#endif 724#endif
385 if (compare_sids(&(ppace[i]->sid), pownersid)) 725 if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
386 access_flags_to_mode(ppace[i]->access_req, 726 access_flags_to_mode(ppace[i]->access_req,
387 ppace[i]->type, 727 ppace[i]->type,
388 &fattr->cf_mode, 728 &fattr->cf_mode,
389 &user_mask); 729 &user_mask);
390 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 730 if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
391 access_flags_to_mode(ppace[i]->access_req, 731 access_flags_to_mode(ppace[i]->access_req,
392 ppace[i]->type, 732 ppace[i]->type,
393 &fattr->cf_mode, 733 &fattr->cf_mode,
394 &group_mask); 734 &group_mask);
395 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 735 if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
396 access_flags_to_mode(ppace[i]->access_req, 736 access_flags_to_mode(ppace[i]->access_req,
397 ppace[i]->type, 737 ppace[i]->type,
398 &fattr->cf_mode, 738 &fattr->cf_mode,
399 &other_mask); 739 &other_mask);
400 if (compare_sids(&(ppace[i]->sid), &sid_authusers)) 740 if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
401 access_flags_to_mode(ppace[i]->access_req, 741 access_flags_to_mode(ppace[i]->access_req,
402 ppace[i]->type, 742 ppace[i]->type,
403 &fattr->cf_mode, 743 &fattr->cf_mode,
@@ -475,10 +815,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
475 815
476 816
477/* Convert CIFS ACL to POSIX form */ 817/* Convert CIFS ACL to POSIX form */
478static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 818static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
479 struct cifs_fattr *fattr) 819 struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
480{ 820{
481 int rc; 821 int rc = 0;
482 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 822 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
483 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ 823 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
484 char *end_of_acl = ((char *)pntsd) + acl_len; 824 char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +840,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 le32_to_cpu(pntsd->sacloffset), dacloffset); 840 le32_to_cpu(pntsd->sacloffset), dacloffset);
501/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 841/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
502 rc = parse_sid(owner_sid_ptr, end_of_acl); 842 rc = parse_sid(owner_sid_ptr, end_of_acl);
503 if (rc) 843 if (rc) {
844 cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
845 return rc;
846 }
847 rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
848 if (rc) {
849 cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
504 return rc; 850 return rc;
851 }
505 852
506 rc = parse_sid(group_sid_ptr, end_of_acl); 853 rc = parse_sid(group_sid_ptr, end_of_acl);
507 if (rc) 854 if (rc) {
855 cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
508 return rc; 856 return rc;
857 }
858 rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
859 if (rc) {
860 cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
861 return rc;
862 }
509 863
510 if (dacloffset) 864 if (dacloffset)
511 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 865 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +874,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
520 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 874 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
521 sizeof(struct cifs_sid)); */ 875 sizeof(struct cifs_sid)); */
522 876
523 return 0; 877 return rc;
524} 878}
525 879
526 880
@@ -592,7 +946,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
592 int oplock = 0; 946 int oplock = 0;
593 int xid, rc; 947 int xid, rc;
594 __u16 fid; 948 __u16 fid;
595 struct cifsTconInfo *tcon; 949 struct cifs_tcon *tcon;
596 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 950 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
597 951
598 if (IS_ERR(tlink)) 952 if (IS_ERR(tlink))
@@ -660,7 +1014,7 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
660 int oplock = 0; 1014 int oplock = 0;
661 int xid, rc; 1015 int xid, rc;
662 __u16 fid; 1016 __u16 fid;
663 struct cifsTconInfo *tcon; 1017 struct cifs_tcon *tcon;
664 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 1018 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
665 1019
666 if (IS_ERR(tlink)) 1020 if (IS_ERR(tlink))
@@ -688,7 +1042,7 @@ out:
688} 1042}
689 1043
690/* Set an ACL on the server */ 1044/* Set an ACL on the server */
691static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 1045int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
692 struct inode *inode, const char *path) 1046 struct inode *inode, const char *path)
693{ 1047{
694 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1048 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1081,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
727 rc = PTR_ERR(pntsd); 1081 rc = PTR_ERR(pntsd);
728 cERROR(1, "%s: error %d getting sec desc", __func__, rc); 1082 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
729 } else { 1083 } else {
730 rc = parse_sec_desc(pntsd, acllen, fattr); 1084 rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
731 kfree(pntsd); 1085 kfree(pntsd);
732 if (rc) 1086 if (rc)
733 cERROR(1, "parse sec desc failed rc = %d", rc); 1087 cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
39#define ACCESS_ALLOWED 0 39#define ACCESS_ALLOWED 0
40#define ACCESS_DENIED 1 40#define ACCESS_DENIED 1
41 41
42#define SIDOWNER 1
43#define SIDGROUP 2
44#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
45
46#define SID_ID_MAPPED 0
47#define SID_ID_PENDING 1
48#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
49#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
50
42struct cifs_ntsd { 51struct cifs_ntsd {
43 __le16 revision; /* revision level */ 52 __le16 revision; /* revision level */
44 __le16 type; 53 __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 83 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 84} __attribute__((packed));
76 85
77extern int match_sid(struct cifs_sid *); 86struct cifs_sid_id {
87 unsigned int refcount; /* increment with spinlock, decrement without */
88 unsigned long id;
89 unsigned long time;
90 unsigned long state;
91 char *sidstr;
92 struct rb_node rbnode;
93 struct cifs_sid sid;
94};
95
96#ifdef __KERNEL__
97extern struct key_type cifs_idmap_key_type;
98extern const struct cred *root_cred;
99#endif /* KERNEL */
100
78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 101extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
79 102
80#endif /* _CIFSACL_H */ 103#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..dfbd9f1f373d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
60 server->session_key.response, server->session_key.len); 60 server->session_key.response, server->session_key.len);
61 61
62 crypto_shash_update(&server->secmech.sdescmd5->shash, 62 crypto_shash_update(&server->secmech.sdescmd5->shash,
63 cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 63 cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
64 64
65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
66 66
@@ -229,7 +229,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
229} 229}
230 230
231/* first calculate 24 bytes ntlm response and then 16 byte session key */ 231/* first calculate 24 bytes ntlm response and then 16 byte session key */
232int setup_ntlm_response(struct cifsSesInfo *ses) 232int setup_ntlm_response(struct cifs_ses *ses)
233{ 233{
234 int rc = 0; 234 int rc = 0;
235 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; 235 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
268} 268}
269 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH 270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 271int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
272 char *lnm_session_key) 272 char *lnm_session_key)
273{ 273{
274 int i; 274 int i;
275 int rc;
275 char password_with_pad[CIFS_ENCPWD_SIZE]; 276 char password_with_pad[CIFS_ENCPWD_SIZE];
276 277
277 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 278 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
282 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 283 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
283 memcpy(lnm_session_key, password_with_pad, 284 memcpy(lnm_session_key, password_with_pad,
284 CIFS_ENCPWD_SIZE); 285 CIFS_ENCPWD_SIZE);
285 return; 286 return 0;
286 } 287 }
287 288
288 /* calculate old style session key */ 289 /* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
299 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 300 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
300 password_with_pad[i] = toupper(password_with_pad[i]); 301 password_with_pad[i] = toupper(password_with_pad[i]);
301 302
302 SMBencrypt(password_with_pad, cryptkey, lnm_session_key); 303 rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
303 304
304 /* clear password before we return/free memory */ 305 return rc;
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306} 306}
307#endif /* CIFS_WEAK_PW_HASH */ 307#endif /* CIFS_WEAK_PW_HASH */
308 308
@@ -312,7 +312,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
312 * Allocate domain name which gets freed when session struct is deallocated. 312 * Allocate domain name which gets freed when session struct is deallocated.
313 */ 313 */
314static int 314static int
315build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 315build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
316{ 316{
317 unsigned int dlen; 317 unsigned int dlen;
318 unsigned int wlen; 318 unsigned int wlen;
@@ -400,7 +400,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
400 * about target string i.e. for some, just user name might suffice. 400 * about target string i.e. for some, just user name might suffice.
401 */ 401 */
402static int 402static int
403find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 403find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
404{ 404{
405 unsigned int attrsize; 405 unsigned int attrsize;
406 unsigned int type; 406 unsigned int type;
@@ -445,7 +445,7 @@ find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
445 return 0; 445 return 0;
446} 446}
447 447
448static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash, 448static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
449 const struct nls_table *nls_cp) 449 const struct nls_table *nls_cp)
450{ 450{
451 int rc = 0; 451 int rc = 0;
@@ -527,7 +527,7 @@ calc_exit_2:
527} 527}
528 528
529static int 529static int
530CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash) 530CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
531{ 531{
532 int rc; 532 int rc;
533 unsigned int offset = CIFS_SESS_KEY_SIZE + 8; 533 unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
@@ -563,7 +563,7 @@ CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
563 563
564 564
565int 565int
566setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp) 566setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
567{ 567{
568 int rc; 568 int rc;
569 int baselen; 569 int baselen;
@@ -649,7 +649,7 @@ setup_ntlmv2_rsp_ret:
649} 649}
650 650
651int 651int
652calc_seckey(struct cifsSesInfo *ses) 652calc_seckey(struct cifs_ses *ses)
653{ 653{
654 int rc; 654 int rc;
655 struct crypto_blkcipher *tfm_arc4; 655 struct crypto_blkcipher *tfm_arc4;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..989442dcfb45 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -104,53 +104,25 @@ cifs_sb_deactive(struct super_block *sb)
104} 104}
105 105
106static int 106static int
107cifs_read_super(struct super_block *sb, void *data, 107cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
108 const char *devname, int silent) 108 const char *devname, int silent)
109{ 109{
110 struct inode *inode; 110 struct inode *inode;
111 struct cifs_sb_info *cifs_sb; 111 struct cifs_sb_info *cifs_sb;
112 int rc = 0; 112 int rc = 0;
113 113
114 /* BB should we make this contingent on mount parm? */
115 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
116 sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
117 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
118 if (cifs_sb == NULL)
119 return -ENOMEM;
120 115
121 spin_lock_init(&cifs_sb->tlink_tree_lock); 116 spin_lock_init(&cifs_sb->tlink_tree_lock);
122 cifs_sb->tlink_tree = RB_ROOT; 117 cifs_sb->tlink_tree = RB_ROOT;
123 118
124 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 119 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
125 if (rc) { 120 if (rc)
126 kfree(cifs_sb);
127 return rc; 121 return rc;
128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 122
131#ifdef CONFIG_CIFS_DFS_UPCALL 123 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
132 /* copy mount params to sb for use in submounts */
133 /* BB: should we move this after the mount so we
134 * do not have to do the copy on failed mounts?
135 * BB: May be it is better to do simple copy before
136 * complex operation (mount), and in case of fail
137 * just exit instead of doing mount and attempting
138 * undo it if this copy fails?*/
139 if (data) {
140 int len = strlen(data);
141 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
142 if (cifs_sb->mountdata == NULL) {
143 bdi_destroy(&cifs_sb->bdi);
144 kfree(sb->s_fs_info);
145 sb->s_fs_info = NULL;
146 return -ENOMEM;
147 }
148 strncpy(cifs_sb->mountdata, data, len + 1);
149 cifs_sb->mountdata[len] = '\0';
150 }
151#endif
152 124
153 rc = cifs_mount(sb, cifs_sb, data, devname); 125 rc = cifs_mount(sb, cifs_sb, volume_info, devname);
154 126
155 if (rc) { 127 if (rc) {
156 if (!silent) 128 if (!silent)
@@ -163,7 +135,7 @@ cifs_read_super(struct super_block *sb, void *data,
163 sb->s_bdi = &cifs_sb->bdi; 135 sb->s_bdi = &cifs_sb->bdi;
164 sb->s_blocksize = CIFS_MAX_MSGSIZE; 136 sb->s_blocksize = CIFS_MAX_MSGSIZE;
165 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 137 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
166 inode = cifs_root_iget(sb, ROOT_I); 138 inode = cifs_root_iget(sb);
167 139
168 if (IS_ERR(inode)) { 140 if (IS_ERR(inode)) {
169 rc = PTR_ERR(inode); 141 rc = PTR_ERR(inode);
@@ -184,12 +156,12 @@ cifs_read_super(struct super_block *sb, void *data,
184 else 156 else
185 sb->s_d_op = &cifs_dentry_ops; 157 sb->s_d_op = &cifs_dentry_ops;
186 158
187#ifdef CONFIG_CIFS_EXPERIMENTAL 159#ifdef CIFS_NFSD_EXPORT
188 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 160 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
189 cFYI(1, "export ops supported"); 161 cFYI(1, "export ops supported");
190 sb->s_export_op = &cifs_export_ops; 162 sb->s_export_op = &cifs_export_ops;
191 } 163 }
192#endif /* EXPERIMENTAL */ 164#endif /* CIFS_NFSD_EXPORT */
193 165
194 return 0; 166 return 0;
195 167
@@ -201,17 +173,7 @@ out_no_root:
201 cifs_umount(sb, cifs_sb); 173 cifs_umount(sb, cifs_sb);
202 174
203out_mount_failed: 175out_mount_failed:
204 if (cifs_sb) { 176 bdi_destroy(&cifs_sb->bdi);
205#ifdef CONFIG_CIFS_DFS_UPCALL
206 if (cifs_sb->mountdata) {
207 kfree(cifs_sb->mountdata);
208 cifs_sb->mountdata = NULL;
209 }
210#endif
211 unload_nls(cifs_sb->local_nls);
212 bdi_destroy(&cifs_sb->bdi);
213 kfree(cifs_sb);
214 }
215 return rc; 177 return rc;
216} 178}
217 179
@@ -231,12 +193,10 @@ cifs_put_super(struct super_block *sb)
231 rc = cifs_umount(sb, cifs_sb); 193 rc = cifs_umount(sb, cifs_sb);
232 if (rc) 194 if (rc)
233 cERROR(1, "cifs_umount failed with return code %d", rc); 195 cERROR(1, "cifs_umount failed with return code %d", rc);
234#ifdef CONFIG_CIFS_DFS_UPCALL
235 if (cifs_sb->mountdata) { 196 if (cifs_sb->mountdata) {
236 kfree(cifs_sb->mountdata); 197 kfree(cifs_sb->mountdata);
237 cifs_sb->mountdata = NULL; 198 cifs_sb->mountdata = NULL;
238 } 199 }
239#endif
240 200
241 unload_nls(cifs_sb->local_nls); 201 unload_nls(cifs_sb->local_nls);
242 bdi_destroy(&cifs_sb->bdi); 202 bdi_destroy(&cifs_sb->bdi);
@@ -248,7 +208,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
248{ 208{
249 struct super_block *sb = dentry->d_sb; 209 struct super_block *sb = dentry->d_sb;
250 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 210 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
251 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 211 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
252 int rc = -EOPNOTSUPP; 212 int rc = -EOPNOTSUPP;
253 int xid; 213 int xid;
254 214
@@ -401,7 +361,7 @@ static int
401cifs_show_options(struct seq_file *s, struct vfsmount *m) 361cifs_show_options(struct seq_file *s, struct vfsmount *m)
402{ 362{
403 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); 363 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
404 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 364 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
405 struct sockaddr *srcaddr; 365 struct sockaddr *srcaddr;
406 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 366 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
407 367
@@ -455,14 +415,20 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
455 seq_printf(s, ",nocase"); 415 seq_printf(s, ",nocase");
456 if (tcon->retry) 416 if (tcon->retry)
457 seq_printf(s, ",hard"); 417 seq_printf(s, ",hard");
458 if (cifs_sb->prepath) 418 if (tcon->unix_ext)
459 seq_printf(s, ",prepath=%s", cifs_sb->prepath); 419 seq_printf(s, ",unix");
420 else
421 seq_printf(s, ",nounix");
460 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
461 seq_printf(s, ",posixpaths"); 423 seq_printf(s, ",posixpaths");
462 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) 424 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
463 seq_printf(s, ",setuids"); 425 seq_printf(s, ",setuids");
464 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 426 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
465 seq_printf(s, ",serverino"); 427 seq_printf(s, ",serverino");
428 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
429 seq_printf(s, ",rwpidforward");
430 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL)
431 seq_printf(s, ",forcemand");
466 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) 432 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
467 seq_printf(s, ",directio"); 433 seq_printf(s, ",directio");
468 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 434 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
@@ -495,7 +461,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
495static void cifs_umount_begin(struct super_block *sb) 461static void cifs_umount_begin(struct super_block *sb)
496{ 462{
497 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 463 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
498 struct cifsTconInfo *tcon; 464 struct cifs_tcon *tcon;
499 465
500 if (cifs_sb == NULL) 466 if (cifs_sb == NULL)
501 return; 467 return;
@@ -570,29 +536,189 @@ static const struct super_operations cifs_super_ops = {
570#endif 536#endif
571}; 537};
572 538
539/*
540 * Get root dentry from superblock according to prefix path mount option.
541 * Return dentry with refcount + 1 on success and NULL otherwise.
542 */
543static struct dentry *
544cifs_get_root(struct smb_vol *vol, struct super_block *sb)
545{
546 int xid, rc;
547 struct inode *inode;
548 struct qstr name;
549 struct dentry *dparent = NULL, *dchild = NULL, *alias;
550 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
551 unsigned int i, full_len, len;
552 char *full_path = NULL, *pstart;
553 char sep;
554
555 full_path = cifs_build_path_to_root(vol, cifs_sb,
556 cifs_sb_master_tcon(cifs_sb));
557 if (full_path == NULL)
558 return NULL;
559
560 cFYI(1, "Get root dentry for %s", full_path);
561
562 xid = GetXid();
563 sep = CIFS_DIR_SEP(cifs_sb);
564 dparent = dget(sb->s_root);
565 full_len = strlen(full_path);
566 full_path[full_len] = sep;
567 pstart = full_path + 1;
568
569 for (i = 1, len = 0; i <= full_len; i++) {
570 if (full_path[i] != sep || !len) {
571 len++;
572 continue;
573 }
574
575 full_path[i] = 0;
576 cFYI(1, "get dentry for %s", pstart);
577
578 name.name = pstart;
579 name.len = len;
580 name.hash = full_name_hash(pstart, len);
581 dchild = d_lookup(dparent, &name);
582 if (dchild == NULL) {
583 cFYI(1, "not exists");
584 dchild = d_alloc(dparent, &name);
585 if (dchild == NULL) {
586 dput(dparent);
587 dparent = NULL;
588 goto out;
589 }
590 }
591
592 cFYI(1, "get inode");
593 if (dchild->d_inode == NULL) {
594 cFYI(1, "not exists");
595 inode = NULL;
596 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
597 rc = cifs_get_inode_info_unix(&inode, full_path,
598 sb, xid);
599 else
600 rc = cifs_get_inode_info(&inode, full_path,
601 NULL, sb, xid, NULL);
602 if (rc) {
603 dput(dchild);
604 dput(dparent);
605 dparent = NULL;
606 goto out;
607 }
608 alias = d_materialise_unique(dchild, inode);
609 if (alias != NULL) {
610 dput(dchild);
611 if (IS_ERR(alias)) {
612 dput(dparent);
613 dparent = NULL;
614 goto out;
615 }
616 dchild = alias;
617 }
618 }
619 cFYI(1, "parent %p, child %p", dparent, dchild);
620
621 dput(dparent);
622 dparent = dchild;
623 len = 0;
624 pstart = full_path + i + 1;
625 full_path[i] = sep;
626 }
627out:
628 _FreeXid(xid);
629 kfree(full_path);
630 return dparent;
631}
632
573static struct dentry * 633static struct dentry *
574cifs_do_mount(struct file_system_type *fs_type, 634cifs_do_mount(struct file_system_type *fs_type,
575 int flags, const char *dev_name, void *data) 635 int flags, const char *dev_name, void *data)
576{ 636{
577 int rc; 637 int rc;
578 struct super_block *sb; 638 struct super_block *sb;
579 639 struct cifs_sb_info *cifs_sb;
580 sb = sget(fs_type, NULL, set_anon_super, NULL); 640 struct smb_vol *volume_info;
641 struct cifs_mnt_data mnt_data;
642 struct dentry *root;
581 643
582 cFYI(1, "Devname: %s flags: %d ", dev_name, flags); 644 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
583 645
584 if (IS_ERR(sb)) 646 rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name);
585 return ERR_CAST(sb); 647 if (rc)
648 return ERR_PTR(rc);
649
650 cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
651 if (cifs_sb == NULL) {
652 root = ERR_PTR(-ENOMEM);
653 goto out;
654 }
655
656 cifs_setup_cifs_sb(volume_info, cifs_sb);
657
658 mnt_data.vol = volume_info;
659 mnt_data.cifs_sb = cifs_sb;
660 mnt_data.flags = flags;
661
662 sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data);
663 if (IS_ERR(sb)) {
664 root = ERR_CAST(sb);
665 goto out_cifs_sb;
666 }
667
668 if (sb->s_fs_info) {
669 cFYI(1, "Use existing superblock");
670 goto out_shared;
671 }
672
673 /*
674 * Copy mount params for use in submounts. Better to do
675 * the copy here and deal with the error before cleanup gets
676 * complicated post-mount.
677 */
678 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
679 if (cifs_sb->mountdata == NULL) {
680 root = ERR_PTR(-ENOMEM);
681 goto out_super;
682 }
586 683
587 sb->s_flags = flags; 684 sb->s_flags = flags;
685 /* BB should we make this contingent on mount parm? */
686 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
687 sb->s_fs_info = cifs_sb;
588 688
589 rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); 689 rc = cifs_read_super(sb, volume_info, dev_name,
690 flags & MS_SILENT ? 1 : 0);
590 if (rc) { 691 if (rc) {
591 deactivate_locked_super(sb); 692 root = ERR_PTR(rc);
592 return ERR_PTR(rc); 693 goto out_super;
593 } 694 }
695
594 sb->s_flags |= MS_ACTIVE; 696 sb->s_flags |= MS_ACTIVE;
595 return dget(sb->s_root); 697
698 root = cifs_get_root(volume_info, sb);
699 if (root == NULL)
700 goto out_super;
701
702 cFYI(1, "dentry root is: %p", root);
703 goto out;
704
705out_shared:
706 root = cifs_get_root(volume_info, sb);
707 if (root)
708 cFYI(1, "dentry root is: %p", root);
709 goto out;
710
711out_super:
712 kfree(cifs_sb->mountdata);
713 deactivate_locked_super(sb);
714
715out_cifs_sb:
716 unload_nls(cifs_sb->local_nls);
717 kfree(cifs_sb);
718
719out:
720 cifs_cleanup_volume_info(&volume_info);
721 return root;
596} 722}
597 723
598static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 724static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -618,16 +744,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
618{ 744{
619 /* origin == SEEK_END => we must revalidate the cached file length */ 745 /* origin == SEEK_END => we must revalidate the cached file length */
620 if (origin == SEEK_END) { 746 if (origin == SEEK_END) {
621 int retval; 747 int rc;
622 748 struct inode *inode = file->f_path.dentry->d_inode;
623 /* some applications poll for the file length in this strange 749
624 way so we must seek to end on non-oplocked files by 750 /*
625 setting the revalidate time to zero */ 751 * We need to be sure that all dirty pages are written and the
626 CIFS_I(file->f_path.dentry->d_inode)->time = 0; 752 * server has the newest file length.
627 753 */
628 retval = cifs_revalidate_file(file); 754 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
629 if (retval < 0) 755 inode->i_mapping->nrpages != 0) {
630 return (loff_t)retval; 756 rc = filemap_fdatawait(inode->i_mapping);
757 if (rc) {
758 mapping_set_error(inode->i_mapping, rc);
759 return rc;
760 }
761 }
762 /*
763 * Some applications poll for the file length in this strange
764 * way so we must seek to end on non-oplocked files by
765 * setting the revalidate time to zero.
766 */
767 CIFS_I(inode)->time = 0;
768
769 rc = cifs_revalidate_file_attr(file);
770 if (rc < 0)
771 return (loff_t)rc;
631 } 772 }
632 return generic_file_llseek_unlocked(file, offset, origin); 773 return generic_file_llseek_unlocked(file, offset, origin);
633} 774}
@@ -760,10 +901,11 @@ const struct file_operations cifs_file_strict_ops = {
760}; 901};
761 902
762const struct file_operations cifs_file_direct_ops = { 903const struct file_operations cifs_file_direct_ops = {
763 /* no aio, no readv - 904 /* BB reevaluate whether they can be done with directio, no cache */
764 BB reevaluate whether they can be done with directio, no cache */ 905 .read = do_sync_read,
765 .read = cifs_user_read, 906 .write = do_sync_write,
766 .write = cifs_user_write, 907 .aio_read = cifs_user_readv,
908 .aio_write = cifs_user_writev,
767 .open = cifs_open, 909 .open = cifs_open,
768 .release = cifs_close, 910 .release = cifs_close,
769 .lock = cifs_lock, 911 .lock = cifs_lock,
@@ -815,10 +957,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
815}; 957};
816 958
817const struct file_operations cifs_file_direct_nobrl_ops = { 959const struct file_operations cifs_file_direct_nobrl_ops = {
818 /* no mmap, no aio, no readv - 960 /* BB reevaluate whether they can be done with directio, no cache */
819 BB reevaluate whether they can be done with directio, no cache */ 961 .read = do_sync_read,
820 .read = cifs_user_read, 962 .write = do_sync_write,
821 .write = cifs_user_write, 963 .aio_read = cifs_user_readv,
964 .aio_write = cifs_user_writev,
822 .open = cifs_open, 965 .open = cifs_open,
823 .release = cifs_close, 966 .release = cifs_close,
824 .fsync = cifs_fsync, 967 .fsync = cifs_fsync,
@@ -981,10 +1124,10 @@ init_cifs(void)
981 int rc = 0; 1124 int rc = 0;
982 cifs_proc_init(); 1125 cifs_proc_init();
983 INIT_LIST_HEAD(&cifs_tcp_ses_list); 1126 INIT_LIST_HEAD(&cifs_tcp_ses_list);
984#ifdef CONFIG_CIFS_EXPERIMENTAL 1127#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
985 INIT_LIST_HEAD(&GlobalDnotifyReqList); 1128 INIT_LIST_HEAD(&GlobalDnotifyReqList);
986 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 1129 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
987#endif 1130#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
988/* 1131/*
989 * Initialize Global counters 1132 * Initialize Global counters
990 */ 1133 */
@@ -1033,22 +1176,33 @@ init_cifs(void)
1033 if (rc) 1176 if (rc)
1034 goto out_destroy_mids; 1177 goto out_destroy_mids;
1035 1178
1036 rc = register_filesystem(&cifs_fs_type);
1037 if (rc)
1038 goto out_destroy_request_bufs;
1039#ifdef CONFIG_CIFS_UPCALL 1179#ifdef CONFIG_CIFS_UPCALL
1040 rc = register_key_type(&cifs_spnego_key_type); 1180 rc = register_key_type(&cifs_spnego_key_type);
1041 if (rc) 1181 if (rc)
1042 goto out_unregister_filesystem; 1182 goto out_destroy_request_bufs;
1043#endif 1183#endif /* CONFIG_CIFS_UPCALL */
1184
1185#ifdef CONFIG_CIFS_ACL
1186 rc = init_cifs_idmap();
1187 if (rc)
1188 goto out_register_key_type;
1189#endif /* CONFIG_CIFS_ACL */
1190
1191 rc = register_filesystem(&cifs_fs_type);
1192 if (rc)
1193 goto out_init_cifs_idmap;
1044 1194
1045 return 0; 1195 return 0;
1046 1196
1047#ifdef CONFIG_CIFS_UPCALL 1197out_init_cifs_idmap:
1048out_unregister_filesystem: 1198#ifdef CONFIG_CIFS_ACL
1049 unregister_filesystem(&cifs_fs_type); 1199 exit_cifs_idmap();
1200out_register_key_type:
1050#endif 1201#endif
1202#ifdef CONFIG_CIFS_UPCALL
1203 unregister_key_type(&cifs_spnego_key_type);
1051out_destroy_request_bufs: 1204out_destroy_request_bufs:
1205#endif
1052 cifs_destroy_request_bufs(); 1206 cifs_destroy_request_bufs();
1053out_destroy_mids: 1207out_destroy_mids:
1054 cifs_destroy_mids(); 1208 cifs_destroy_mids();
@@ -1070,6 +1224,10 @@ exit_cifs(void)
1070#ifdef CONFIG_CIFS_DFS_UPCALL 1224#ifdef CONFIG_CIFS_DFS_UPCALL
1071 cifs_dfs_release_automount_timer(); 1225 cifs_dfs_release_automount_timer();
1072#endif 1226#endif
1227#ifdef CONFIG_CIFS_ACL
1228 cifs_destroy_idmaptrees();
1229 exit_cifs_idmap();
1230#endif
1073#ifdef CONFIG_CIFS_UPCALL 1231#ifdef CONFIG_CIFS_UPCALL
1074 unregister_key_type(&cifs_spnego_key_type); 1232 unregister_key_type(&cifs_spnego_key_type);
1075#endif 1233#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
47 47
48/* Functions related to inodes */ 48/* Functions related to inodes */
49extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
50extern struct inode *cifs_root_iget(struct super_block *, unsigned long); 50extern struct inode *cifs_root_iget(struct super_block *);
51extern int cifs_create(struct inode *, struct dentry *, int, 51extern int cifs_create(struct inode *, struct dentry *, int,
52 struct nameidata *); 52 struct nameidata *);
53extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 53extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
59extern int cifs_rmdir(struct inode *, struct dentry *); 59extern int cifs_rmdir(struct inode *, struct dentry *);
60extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 60extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
61 struct dentry *); 61 struct dentry *);
62extern int cifs_revalidate_file_attr(struct file *filp);
63extern int cifs_revalidate_dentry_attr(struct dentry *);
62extern int cifs_revalidate_file(struct file *filp); 64extern int cifs_revalidate_file(struct file *filp);
63extern int cifs_revalidate_dentry(struct dentry *); 65extern int cifs_revalidate_dentry(struct dentry *);
64extern void cifs_invalidate_mapping(struct inode *inode); 66extern int cifs_invalidate_mapping(struct inode *inode);
65extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 67extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
66extern int cifs_setattr(struct dentry *, struct iattr *); 68extern int cifs_setattr(struct dentry *, struct iattr *);
67 69
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
80extern int cifs_open(struct inode *inode, struct file *file); 82extern int cifs_open(struct inode *inode, struct file *file);
81extern int cifs_close(struct inode *inode, struct file *file); 83extern int cifs_close(struct inode *inode, struct file *file);
82extern int cifs_closedir(struct inode *inode, struct file *file); 84extern int cifs_closedir(struct inode *inode, struct file *file);
83extern ssize_t cifs_user_read(struct file *file, char __user *read_data, 85extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
84 size_t read_size, loff_t *poffset); 86 unsigned long nr_segs, loff_t pos);
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 87extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 88 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 89extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
88 size_t write_size, loff_t *poffset); 90 unsigned long nr_segs, loff_t pos);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
91extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
123extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 125extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
124extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 126extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125 127
126#ifdef CONFIG_CIFS_EXPERIMENTAL 128#ifdef CIFS_NFSD_EXPORT
127extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 130#endif /* CIFS_NFSD_EXPORT */
129 131
130#define CIFS_VERSION "1.71" 132#define CIFS_VERSION "1.72"
131#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..6255fa812c7a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -155,6 +155,81 @@ struct cifs_cred {
155 ***************************************************************** 155 *****************************************************************
156 */ 156 */
157 157
158struct smb_vol {
159 char *username;
160 char *password;
161 char *domainname;
162 char *UNC;
163 char *UNCip;
164 char *iocharset; /* local code page for mapping to and from Unicode */
165 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
166 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
167 uid_t cred_uid;
168 uid_t linux_uid;
169 gid_t linux_gid;
170 mode_t file_mode;
171 mode_t dir_mode;
172 unsigned secFlg;
173 bool retry:1;
174 bool intr:1;
175 bool setuids:1;
176 bool override_uid:1;
177 bool override_gid:1;
178 bool dynperm:1;
179 bool noperm:1;
180 bool no_psx_acl:1; /* set if posix acl support should be disabled */
181 bool cifs_acl:1;
182 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
183 bool server_ino:1; /* use inode numbers from server ie UniqueId */
184 bool direct_io:1;
185 bool strict_io:1; /* strict cache behavior */
186 bool remap:1; /* set to remap seven reserved chars in filenames */
187 bool posix_paths:1; /* unset to not ask for posix pathnames. */
188 bool no_linux_ext:1;
189 bool sfu_emul:1;
190 bool nullauth:1; /* attempt to authenticate with null user */
191 bool nocase:1; /* request case insensitive filenames */
192 bool nobrl:1; /* disable sending byte range locks to srv */
193 bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
194 bool seal:1; /* request transport encryption on share */
195 bool nodfs:1; /* Do not request DFS, even if available */
196 bool local_lease:1; /* check leases only on local system, not remote */
197 bool noblocksnd:1;
198 bool noautotune:1;
199 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
200 bool fsc:1; /* enable fscache */
201 bool mfsymlinks:1; /* use Minshall+French Symlinks */
202 bool multiuser:1;
203 bool rwpidforward:1; /* pid forward for read/write operations */
204 unsigned int rsize;
205 unsigned int wsize;
206 bool sockopt_tcp_nodelay:1;
207 unsigned short int port;
208 unsigned long actimeo; /* attribute cache timeout (jiffies) */
209 char *prepath;
210 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
211 struct nls_table *local_nls;
212};
213
214#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
215 CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \
216 CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \
217 CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \
218 CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \
219 CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \
220 CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \
221 CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \
222 CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO)
223
224#define CIFS_MS_MASK (MS_RDONLY | MS_MANDLOCK | MS_NOEXEC | MS_NOSUID | \
225 MS_NODEV | MS_SYNCHRONOUS)
226
227struct cifs_mnt_data {
228 struct cifs_sb_info *cifs_sb;
229 struct smb_vol *vol;
230 int flags;
231};
232
158struct TCP_Server_Info { 233struct TCP_Server_Info {
159 struct list_head tcp_ses_list; 234 struct list_head tcp_ses_list;
160 struct list_head smb_ses_list; 235 struct list_head smb_ses_list;
@@ -179,7 +254,7 @@ struct TCP_Server_Info {
179 struct mutex srv_mutex; 254 struct mutex srv_mutex;
180 struct task_struct *tsk; 255 struct task_struct *tsk;
181 char server_GUID[16]; 256 char server_GUID[16];
182 char secMode; 257 char sec_mode;
183 bool session_estab; /* mark when very first sess is established */ 258 bool session_estab; /* mark when very first sess is established */
184 u16 dialect; /* dialect index that server chose */ 259 u16 dialect; /* dialect index that server chose */
185 enum securityEnum secType; 260 enum securityEnum secType;
@@ -254,7 +329,7 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
254/* 329/*
255 * Session structure. One of these for each uid session with a particular host 330 * Session structure. One of these for each uid session with a particular host
256 */ 331 */
257struct cifsSesInfo { 332struct cifs_ses {
258 struct list_head smb_ses_list; 333 struct list_head smb_ses_list;
259 struct list_head tcon_list; 334 struct list_head tcon_list;
260 struct mutex session_mutex; 335 struct mutex session_mutex;
@@ -274,7 +349,8 @@ struct cifsSesInfo {
274 int capabilities; 349 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 350 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 351 TCP names - will ipv6 and sctp addresses fit? */
277 char *user_name; 352 char *user_name; /* must not be null except during init of sess
353 and after mount option parsing we fill it */
278 char *domainName; 354 char *domainName;
279 char *password; 355 char *password;
280 struct session_key auth_key; 356 struct session_key auth_key;
@@ -293,11 +369,11 @@ struct cifsSesInfo {
293 * there is one of these for each connection to a resource on a particular 369 * there is one of these for each connection to a resource on a particular
294 * session 370 * session
295 */ 371 */
296struct cifsTconInfo { 372struct cifs_tcon {
297 struct list_head tcon_list; 373 struct list_head tcon_list;
298 int tc_count; 374 int tc_count;
299 struct list_head openFileList; 375 struct list_head openFileList;
300 struct cifsSesInfo *ses; /* pointer to session associated with */ 376 struct cifs_ses *ses; /* pointer to session associated with */
301 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ 377 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
302 char *nativeFileSystem; 378 char *nativeFileSystem;
303 char *password; /* for share-level security */ 379 char *password; /* for share-level security */
@@ -379,12 +455,12 @@ struct tcon_link {
379#define TCON_LINK_IN_TREE 2 455#define TCON_LINK_IN_TREE 2
380 unsigned long tl_time; 456 unsigned long tl_time;
381 atomic_t tl_count; 457 atomic_t tl_count;
382 struct cifsTconInfo *tl_tcon; 458 struct cifs_tcon *tl_tcon;
383}; 459};
384 460
385extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); 461extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb);
386 462
387static inline struct cifsTconInfo * 463static inline struct cifs_tcon *
388tlink_tcon(struct tcon_link *tlink) 464tlink_tcon(struct tcon_link *tlink)
389{ 465{
390 return tlink->tl_tcon; 466 return tlink->tl_tcon;
@@ -401,7 +477,7 @@ cifs_get_tlink(struct tcon_link *tlink)
401} 477}
402 478
403/* This function is always expected to succeed */ 479/* This function is always expected to succeed */
404extern struct cifsTconInfo *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); 480extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb);
405 481
406/* 482/*
407 * This info hangs off the cifsFileInfo structure, pointed to by llist. 483 * This info hangs off the cifsFileInfo structure, pointed to by llist.
@@ -454,6 +530,14 @@ struct cifsFileInfo {
454 struct work_struct oplock_break; /* work for oplock breaks */ 530 struct work_struct oplock_break; /* work for oplock breaks */
455}; 531};
456 532
533struct cifs_io_parms {
534 __u16 netfid;
535 __u32 pid;
536 __u64 offset;
537 unsigned int length;
538 struct cifs_tcon *tcon;
539};
540
457/* 541/*
458 * Take a reference on the file private data. Must be called with 542 * Take a reference on the file private data. Must be called with
459 * cifs_file_list_lock held. 543 * cifs_file_list_lock held.
@@ -508,10 +592,30 @@ static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
508 return '\\'; 592 return '\\';
509} 593}
510 594
595static inline void
596convert_delimiter(char *path, char delim)
597{
598 int i;
599 char old_delim;
600
601 if (path == NULL)
602 return;
603
604 if (delim == '/')
605 old_delim = '\\';
606 else
607 old_delim = '/';
608
609 for (i = 0; path[i] != '\0'; i++) {
610 if (path[i] == old_delim)
611 path[i] = delim;
612 }
613}
614
511#ifdef CONFIG_CIFS_STATS 615#ifdef CONFIG_CIFS_STATS
512#define cifs_stats_inc atomic_inc 616#define cifs_stats_inc atomic_inc
513 617
514static inline void cifs_stats_bytes_written(struct cifsTconInfo *tcon, 618static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon,
515 unsigned int bytes) 619 unsigned int bytes)
516{ 620{
517 if (bytes) { 621 if (bytes) {
@@ -521,7 +625,7 @@ static inline void cifs_stats_bytes_written(struct cifsTconInfo *tcon,
521 } 625 }
522} 626}
523 627
524static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon, 628static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon,
525 unsigned int bytes) 629 unsigned int bytes)
526{ 630{
527 spin_lock(&tcon->stat_lock); 631 spin_lock(&tcon->stat_lock);
@@ -542,9 +646,8 @@ struct mid_q_entry;
542 * This is the prototype for the mid callback function. When creating one, 646 * This is the prototype for the mid callback function. When creating one,
543 * take special care to avoid deadlocks. Things to bear in mind: 647 * take special care to avoid deadlocks. Things to bear in mind:
544 * 648 *
545 * - it will be called by cifsd 649 * - it will be called by cifsd, with no locks held
546 * - the GlobalMid_Lock will be held 650 * - the mid will be removed from any lists
547 * - the mid will be removed from the pending_mid_q list
548 */ 651 */
549typedef void (mid_callback_t)(struct mid_q_entry *mid); 652typedef void (mid_callback_t)(struct mid_q_entry *mid);
550 653
@@ -572,7 +675,7 @@ struct mid_q_entry {
572struct oplock_q_entry { 675struct oplock_q_entry {
573 struct list_head qhead; 676 struct list_head qhead;
574 struct inode *pinode; 677 struct inode *pinode;
575 struct cifsTconInfo *tcon; 678 struct cifs_tcon *tcon;
576 __u16 netfid; 679 __u16 netfid;
577}; 680};
578 681
@@ -655,6 +758,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
655#define MID_RESPONSE_RECEIVED 4 758#define MID_RESPONSE_RECEIVED 4
656#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 759#define MID_RETRY_NEEDED 8 /* session closed while this request out */
657#define MID_RESPONSE_MALFORMED 0x10 760#define MID_RESPONSE_MALFORMED 0x10
761#define MID_SHUTDOWN 0x20
658 762
659/* Types of response buffer returned from SendReceive2 */ 763/* Types of response buffer returned from SendReceive2 */
660#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 764#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
@@ -780,10 +884,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
780 */ 884 */
781GLOBAL_EXTERN spinlock_t cifs_file_list_lock; 885GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
782 886
887#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
783/* Outstanding dir notify requests */ 888/* Outstanding dir notify requests */
784GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 889GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
785/* DirNotify response queue */ 890/* DirNotify response queue */
786GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; 891GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
892#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
787 893
788/* 894/*
789 * Global transaction id (XID) information 895 * Global transaction id (XID) information
@@ -830,6 +936,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
830/* reconnect after this many failed echo attempts */ 936/* reconnect after this many failed echo attempts */
831GLOBAL_EXTERN unsigned short echo_retries; 937GLOBAL_EXTERN unsigned short echo_retries;
832 938
939GLOBAL_EXTERN struct rb_root uidtree;
940GLOBAL_EXTERN struct rb_root gidtree;
941GLOBAL_EXTERN spinlock_t siduidlock;
942GLOBAL_EXTERN spinlock_t sidgidlock;
943
833void cifs_oplock_break(struct work_struct *work); 944void cifs_oplock_break(struct work_struct *work);
834void cifs_oplock_break_get(struct cifsFileInfo *cfile); 945void cifs_oplock_break_get(struct cifsFileInfo *cfile);
835void cifs_oplock_break_put(struct cifsFileInfo *cfile); 946void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */ 397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
398 398
399struct smb_hdr { 399struct smb_hdr {
400 __u32 smb_buf_length; /* big endian on wire *//* BB length is only two 400 __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
401 or three bytes - with one or two byte type preceding it that are 401 with one or two byte "type" preceding it that will be
402 zero - we could mask the type byte off just in case BB */ 402 zero - we could mask the type byte off */
403 __u8 Protocol[4]; 403 __u8 Protocol[4];
404 __u8 Command; 404 __u8 Command;
405 union { 405 union {
@@ -428,43 +428,28 @@ struct smb_hdr {
428 __u8 WordCount; 428 __u8 WordCount;
429} __attribute__((packed)); 429} __attribute__((packed));
430 430
431/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ 431/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
432#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ 432static inline void *
433 (2 * (smb_var)->WordCount)) 433BCC(struct smb_hdr *smb)
434{
435 return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
436}
434 437
435/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 438/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
436#define pByteArea(smb_var) (BCC(smb_var) + 2) 439#define pByteArea(smb_var) (BCC(smb_var) + 2)
437 440
438/* get the converted ByteCount for a SMB packet and return it */
439static inline __u16
440get_bcc(struct smb_hdr *hdr)
441{
442 __u16 *bc_ptr = (__u16 *)BCC(hdr);
443
444 return get_unaligned(bc_ptr);
445}
446
447/* get the unconverted ByteCount for a SMB packet and return it */ 441/* get the unconverted ByteCount for a SMB packet and return it */
448static inline __u16 442static inline __u16
449get_bcc_le(struct smb_hdr *hdr) 443get_bcc(struct smb_hdr *hdr)
450{ 444{
451 __le16 *bc_ptr = (__le16 *)BCC(hdr); 445 __le16 *bc_ptr = (__le16 *)BCC(hdr);
452 446
453 return get_unaligned_le16(bc_ptr); 447 return get_unaligned_le16(bc_ptr);
454} 448}
455 449
456/* set the ByteCount for a SMB packet in host-byte order */
457static inline void
458put_bcc(__u16 count, struct smb_hdr *hdr)
459{
460 __u16 *bc_ptr = (__u16 *)BCC(hdr);
461
462 put_unaligned(count, bc_ptr);
463}
464
465/* set the ByteCount for a SMB packet in little-endian */ 450/* set the ByteCount for a SMB packet in little-endian */
466static inline void 451static inline void
467put_bcc_le(__u16 count, struct smb_hdr *hdr) 452put_bcc(__u16 count, struct smb_hdr *hdr)
468{ 453{
469 __le16 *bc_ptr = (__le16 *)BCC(hdr); 454 __le16 *bc_ptr = (__le16 *)BCC(hdr);
470 455
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..953f84413c77 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,9 +53,13 @@ do { \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ 53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void);
56extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct smb_vol *vol,
58 struct cifsTconInfo *tcon); 61 struct cifs_sb_info *cifs_sb,
62 struct cifs_tcon *tcon);
59extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 63extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
60extern char *cifs_compose_mount_options(const char *sb_mountdata, 64extern char *cifs_compose_mount_options(const char *sb_mountdata,
61 const char *fullpath, const struct dfs_info3_param *ref, 65 const char *fullpath, const struct dfs_info3_param *ref,
@@ -64,20 +68,22 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata,
64extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, 68extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
65 struct TCP_Server_Info *server); 69 struct TCP_Server_Info *server);
66extern void DeleteMidQEntry(struct mid_q_entry *midEntry); 70extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
67extern int cifs_call_async(struct TCP_Server_Info *server, 71extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
68 struct smb_hdr *in_buf, mid_callback_t *callback, 72 unsigned int nvec, mid_callback_t *callback,
69 void *cbdata); 73 void *cbdata, bool ignore_pend);
70extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, 74extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
71 struct smb_hdr * /* input */ , 75 struct smb_hdr * /* input */ ,
72 struct smb_hdr * /* out */ , 76 struct smb_hdr * /* out */ ,
73 int * /* bytes returned */ , const int long_op); 77 int * /* bytes returned */ , const int long_op);
74extern int SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, 78extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
75 struct smb_hdr *in_buf, int flags); 79 struct smb_hdr *in_buf, int flags);
76extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 80extern int cifs_check_receive(struct mid_q_entry *mid,
81 struct TCP_Server_Info *server, bool log_error);
82extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
77 struct kvec *, int /* nvec to send */, 83 struct kvec *, int /* nvec to send */,
78 int * /* type of buf returned */ , const int flags); 84 int * /* type of buf returned */ , const int flags);
79extern int SendReceiveBlockingLock(const unsigned int xid, 85extern int SendReceiveBlockingLock(const unsigned int xid,
80 struct cifsTconInfo *ptcon, 86 struct cifs_tcon *ptcon,
81 struct smb_hdr *in_buf , 87 struct smb_hdr *in_buf ,
82 struct smb_hdr *out_buf, 88 struct smb_hdr *out_buf,
83 int *bytes_returned); 89 int *bytes_returned);
@@ -90,21 +96,20 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 96extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 97extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
92extern unsigned int smbCalcSize(struct smb_hdr *ptr); 98extern unsigned int smbCalcSize(struct smb_hdr *ptr);
93extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
94extern int decode_negTokenInit(unsigned char *security_blob, int length, 99extern int decode_negTokenInit(unsigned char *security_blob, int length,
95 struct TCP_Server_Info *server); 100 struct TCP_Server_Info *server);
96extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 101extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
97extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); 102extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
98extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 103extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
99 const unsigned short int port); 104 const unsigned short int port);
100extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 105extern int map_smb_to_linux_error(struct smb_hdr *smb, bool logErr);
101extern void header_assemble(struct smb_hdr *, char /* command */ , 106extern void header_assemble(struct smb_hdr *, char /* command */ ,
102 const struct cifsTconInfo *, int /* length of 107 const struct cifs_tcon *, int /* length of
103 fixed section (word count) in two byte units */); 108 fixed section (word count) in two byte units */);
104extern int small_smb_init_no_tc(const int smb_cmd, const int wct, 109extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
105 struct cifsSesInfo *ses, 110 struct cifs_ses *ses,
106 void **request_buf); 111 void **request_buf);
107extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 112extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
108 const struct nls_table *nls_cp); 113 const struct nls_table *nls_cp);
109extern __u16 GetNextMid(struct TCP_Server_Info *server); 114extern __u16 GetNextMid(struct TCP_Server_Info *server);
110extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 115extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
@@ -143,103 +148,111 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
143extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); 148extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
144extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 149extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
145 const char *, u32 *); 150 const char *, u32 *);
151extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
152 const char *);
146 153
147extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 154extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
148 const char *); 155 struct cifs_sb_info *cifs_sb);
156extern int cifs_match_super(struct super_block *, void *);
157extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
158extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
159 char *mount_data, const char *devname);
160extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
161 struct smb_vol *, const char *);
149extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 162extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
150extern void cifs_dfs_release_automount_timer(void); 163extern void cifs_dfs_release_automount_timer(void);
151void cifs_proc_init(void); 164void cifs_proc_init(void);
152void cifs_proc_clean(void); 165void cifs_proc_clean(void);
153 166
154extern int cifs_negotiate_protocol(unsigned int xid, 167extern int cifs_negotiate_protocol(unsigned int xid,
155 struct cifsSesInfo *ses); 168 struct cifs_ses *ses);
156extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, 169extern int cifs_setup_session(unsigned int xid, struct cifs_ses *ses,
157 struct nls_table *nls_info); 170 struct nls_table *nls_info);
158extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); 171extern int CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses);
159 172
160extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, 173extern int CIFSTCon(unsigned int xid, struct cifs_ses *ses,
161 const char *tree, struct cifsTconInfo *tcon, 174 const char *tree, struct cifs_tcon *tcon,
162 const struct nls_table *); 175 const struct nls_table *);
163 176
164extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 177extern int CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
165 const char *searchName, const struct nls_table *nls_codepage, 178 const char *searchName, const struct nls_table *nls_codepage,
166 __u16 *searchHandle, struct cifs_search_info *psrch_inf, 179 __u16 *searchHandle, struct cifs_search_info *psrch_inf,
167 int map, const char dirsep); 180 int map, const char dirsep);
168 181
169extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, 182extern int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
170 __u16 searchHandle, struct cifs_search_info *psrch_inf); 183 __u16 searchHandle, struct cifs_search_info *psrch_inf);
171 184
172extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, 185extern int CIFSFindClose(const int, struct cifs_tcon *tcon,
173 const __u16 search_handle); 186 const __u16 search_handle);
174 187
175extern int CIFSSMBQFileInfo(const int xid, struct cifsTconInfo *tcon, 188extern int CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon,
176 u16 netfid, FILE_ALL_INFO *pFindData); 189 u16 netfid, FILE_ALL_INFO *pFindData);
177extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, 190extern int CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon,
178 const unsigned char *searchName, 191 const unsigned char *searchName,
179 FILE_ALL_INFO *findData, 192 FILE_ALL_INFO *findData,
180 int legacy /* whether to use old info level */, 193 int legacy /* whether to use old info level */,
181 const struct nls_table *nls_codepage, int remap); 194 const struct nls_table *nls_codepage, int remap);
182extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, 195extern int SMBQueryInformation(const int xid, struct cifs_tcon *tcon,
183 const unsigned char *searchName, 196 const unsigned char *searchName,
184 FILE_ALL_INFO *findData, 197 FILE_ALL_INFO *findData,
185 const struct nls_table *nls_codepage, int remap); 198 const struct nls_table *nls_codepage, int remap);
186 199
187extern int CIFSSMBUnixQFileInfo(const int xid, struct cifsTconInfo *tcon, 200extern int CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon,
188 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData); 201 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData);
189extern int CIFSSMBUnixQPathInfo(const int xid, 202extern int CIFSSMBUnixQPathInfo(const int xid,
190 struct cifsTconInfo *tcon, 203 struct cifs_tcon *tcon,
191 const unsigned char *searchName, 204 const unsigned char *searchName,
192 FILE_UNIX_BASIC_INFO *pFindData, 205 FILE_UNIX_BASIC_INFO *pFindData,
193 const struct nls_table *nls_codepage, int remap); 206 const struct nls_table *nls_codepage, int remap);
194 207
195extern int CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, 208extern int CIFSGetDFSRefer(const int xid, struct cifs_ses *ses,
196 const unsigned char *searchName, 209 const unsigned char *searchName,
197 struct dfs_info3_param **target_nodes, 210 struct dfs_info3_param **target_nodes,
198 unsigned int *number_of_nodes_in_array, 211 unsigned int *number_of_nodes_in_array,
199 const struct nls_table *nls_codepage, int remap); 212 const struct nls_table *nls_codepage, int remap);
200 213
201extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, 214extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo,
202 const char *old_path, 215 const char *old_path,
203 const struct nls_table *nls_codepage, 216 const struct nls_table *nls_codepage,
204 unsigned int *pnum_referrals, 217 unsigned int *pnum_referrals,
205 struct dfs_info3_param **preferrals, 218 struct dfs_info3_param **preferrals,
206 int remap); 219 int remap);
207extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 220extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
208 struct super_block *sb, struct smb_vol *vol); 221 struct super_block *sb, struct smb_vol *vol);
209extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, 222extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon,
210 struct kstatfs *FSData); 223 struct kstatfs *FSData);
211extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, 224extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon,
212 struct kstatfs *FSData); 225 struct kstatfs *FSData);
213extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, 226extern int CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon,
214 __u64 cap); 227 __u64 cap);
215 228
216extern int CIFSSMBQFSAttributeInfo(const int xid, 229extern int CIFSSMBQFSAttributeInfo(const int xid,
217 struct cifsTconInfo *tcon); 230 struct cifs_tcon *tcon);
218extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon); 231extern int CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon);
219extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon); 232extern int CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon);
220extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 233extern int CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon,
221 struct kstatfs *FSData); 234 struct kstatfs *FSData);
222 235
223extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, 236extern int CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon,
224 const char *fileName, const FILE_BASIC_INFO *data, 237 const char *fileName, const FILE_BASIC_INFO *data,
225 const struct nls_table *nls_codepage, 238 const struct nls_table *nls_codepage,
226 int remap_special_chars); 239 int remap_special_chars);
227extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, 240extern int CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
228 const FILE_BASIC_INFO *data, __u16 fid, 241 const FILE_BASIC_INFO *data, __u16 fid,
229 __u32 pid_of_opener); 242 __u32 pid_of_opener);
230extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, 243extern int CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon,
231 bool delete_file, __u16 fid, __u32 pid_of_opener); 244 bool delete_file, __u16 fid, __u32 pid_of_opener);
232#if 0 245#if 0
233extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 246extern int CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon,
234 char *fileName, __u16 dos_attributes, 247 char *fileName, __u16 dos_attributes,
235 const struct nls_table *nls_codepage); 248 const struct nls_table *nls_codepage);
236#endif /* possibly unneeded function */ 249#endif /* possibly unneeded function */
237extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, 250extern int CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon,
238 const char *fileName, __u64 size, 251 const char *fileName, __u64 size,
239 bool setAllocationSizeFlag, 252 bool setAllocationSizeFlag,
240 const struct nls_table *nls_codepage, 253 const struct nls_table *nls_codepage,
241 int remap_special_chars); 254 int remap_special_chars);
242extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 255extern int CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon,
243 __u64 size, __u16 fileHandle, __u32 opener_pid, 256 __u64 size, __u16 fileHandle, __u32 opener_pid,
244 bool AllocSizeFlag); 257 bool AllocSizeFlag);
245 258
@@ -253,121 +266,116 @@ struct cifs_unix_set_info_args {
253 dev_t device; 266 dev_t device;
254}; 267};
255 268
256extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, 269extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
257 const struct cifs_unix_set_info_args *args, 270 const struct cifs_unix_set_info_args *args,
258 u16 fid, u32 pid_of_opener); 271 u16 fid, u32 pid_of_opener);
259 272
260extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon, 273extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *pTcon,
261 char *fileName, 274 char *fileName,
262 const struct cifs_unix_set_info_args *args, 275 const struct cifs_unix_set_info_args *args,
263 const struct nls_table *nls_codepage, 276 const struct nls_table *nls_codepage,
264 int remap_special_chars); 277 int remap_special_chars);
265 278
266extern int CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon, 279extern int CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon,
267 const char *newName, 280 const char *newName,
268 const struct nls_table *nls_codepage, 281 const struct nls_table *nls_codepage,
269 int remap_special_chars); 282 int remap_special_chars);
270extern int CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, 283extern int CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon,
271 const char *name, const struct nls_table *nls_codepage, 284 const char *name, const struct nls_table *nls_codepage,
272 int remap_special_chars); 285 int remap_special_chars);
273extern int CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, 286extern int CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon,
274 const char *name, __u16 type, 287 const char *name, __u16 type,
275 const struct nls_table *nls_codepage, 288 const struct nls_table *nls_codepage,
276 int remap_special_chars); 289 int remap_special_chars);
277extern int CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, 290extern int CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon,
278 const char *name, 291 const char *name,
279 const struct nls_table *nls_codepage, 292 const struct nls_table *nls_codepage,
280 int remap_special_chars); 293 int remap_special_chars);
281extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, 294extern int CIFSSMBRename(const int xid, struct cifs_tcon *tcon,
282 const char *fromName, const char *toName, 295 const char *fromName, const char *toName,
283 const struct nls_table *nls_codepage, 296 const struct nls_table *nls_codepage,
284 int remap_special_chars); 297 int remap_special_chars);
285extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, 298extern int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon,
286 int netfid, const char *target_name, 299 int netfid, const char *target_name,
287 const struct nls_table *nls_codepage, 300 const struct nls_table *nls_codepage,
288 int remap_special_chars); 301 int remap_special_chars);
289extern int CIFSCreateHardLink(const int xid, 302extern int CIFSCreateHardLink(const int xid,
290 struct cifsTconInfo *tcon, 303 struct cifs_tcon *tcon,
291 const char *fromName, const char *toName, 304 const char *fromName, const char *toName,
292 const struct nls_table *nls_codepage, 305 const struct nls_table *nls_codepage,
293 int remap_special_chars); 306 int remap_special_chars);
294extern int CIFSUnixCreateHardLink(const int xid, 307extern int CIFSUnixCreateHardLink(const int xid,
295 struct cifsTconInfo *tcon, 308 struct cifs_tcon *tcon,
296 const char *fromName, const char *toName, 309 const char *fromName, const char *toName,
297 const struct nls_table *nls_codepage, 310 const struct nls_table *nls_codepage,
298 int remap_special_chars); 311 int remap_special_chars);
299extern int CIFSUnixCreateSymLink(const int xid, 312extern int CIFSUnixCreateSymLink(const int xid,
300 struct cifsTconInfo *tcon, 313 struct cifs_tcon *tcon,
301 const char *fromName, const char *toName, 314 const char *fromName, const char *toName,
302 const struct nls_table *nls_codepage); 315 const struct nls_table *nls_codepage);
303extern int CIFSSMBUnixQuerySymLink(const int xid, 316extern int CIFSSMBUnixQuerySymLink(const int xid,
304 struct cifsTconInfo *tcon, 317 struct cifs_tcon *tcon,
305 const unsigned char *searchName, char **syminfo, 318 const unsigned char *searchName, char **syminfo,
306 const struct nls_table *nls_codepage); 319 const struct nls_table *nls_codepage);
320#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
307extern int CIFSSMBQueryReparseLinkInfo(const int xid, 321extern int CIFSSMBQueryReparseLinkInfo(const int xid,
308 struct cifsTconInfo *tcon, 322 struct cifs_tcon *tcon,
309 const unsigned char *searchName, 323 const unsigned char *searchName,
310 char *symlinkinfo, const int buflen, __u16 fid, 324 char *symlinkinfo, const int buflen, __u16 fid,
311 const struct nls_table *nls_codepage); 325 const struct nls_table *nls_codepage);
312 326#endif /* temporarily unused until cifs_symlink fixed */
313extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 327extern int CIFSSMBOpen(const int xid, struct cifs_tcon *tcon,
314 const char *fileName, const int disposition, 328 const char *fileName, const int disposition,
315 const int access_flags, const int omode, 329 const int access_flags, const int omode,
316 __u16 *netfid, int *pOplock, FILE_ALL_INFO *, 330 __u16 *netfid, int *pOplock, FILE_ALL_INFO *,
317 const struct nls_table *nls_codepage, int remap); 331 const struct nls_table *nls_codepage, int remap);
318extern int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, 332extern int SMBLegacyOpen(const int xid, struct cifs_tcon *tcon,
319 const char *fileName, const int disposition, 333 const char *fileName, const int disposition,
320 const int access_flags, const int omode, 334 const int access_flags, const int omode,
321 __u16 *netfid, int *pOplock, FILE_ALL_INFO *, 335 __u16 *netfid, int *pOplock, FILE_ALL_INFO *,
322 const struct nls_table *nls_codepage, int remap); 336 const struct nls_table *nls_codepage, int remap);
323extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, 337extern int CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon,
324 u32 posix_flags, __u64 mode, __u16 *netfid, 338 u32 posix_flags, __u64 mode, __u16 *netfid,
325 FILE_UNIX_BASIC_INFO *pRetData, 339 FILE_UNIX_BASIC_INFO *pRetData,
326 __u32 *pOplock, const char *name, 340 __u32 *pOplock, const char *name,
327 const struct nls_table *nls_codepage, int remap); 341 const struct nls_table *nls_codepage, int remap);
328extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, 342extern int CIFSSMBClose(const int xid, struct cifs_tcon *tcon,
329 const int smb_file_id); 343 const int smb_file_id);
330 344
331extern int CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, 345extern int CIFSSMBFlush(const int xid, struct cifs_tcon *tcon,
332 const int smb_file_id); 346 const int smb_file_id);
333 347
334extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, 348extern int CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms,
335 const int netfid, unsigned int count, 349 unsigned int *nbytes, char **buf,
336 const __u64 lseek, unsigned int *nbytes, char **buf,
337 int *return_buf_type); 350 int *return_buf_type);
338extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 351extern int CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms,
339 const int netfid, const unsigned int count, 352 unsigned int *nbytes, const char *buf,
340 const __u64 lseek, unsigned int *nbytes, 353 const char __user *ubuf, const int long_op);
341 const char *buf, const char __user *ubuf, 354extern int CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms,
355 unsigned int *nbytes, struct kvec *iov, const int nvec,
342 const int long_op); 356 const int long_op);
343extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon,
344 const int netfid, const unsigned int count,
345 const __u64 offset, unsigned int *nbytes,
346 struct kvec *iov, const int nvec, const int long_op);
347extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
348 const unsigned char *searchName, __u64 *inode_number, 358 const unsigned char *searchName, __u64 *inode_number,
349 const struct nls_table *nls_codepage, 359 const struct nls_table *nls_codepage,
350 int remap_special_chars); 360 int remap_special_chars);
351extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
352 const struct nls_table *cp, int mapChars);
353 361
354extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 362extern int CIFSSMBLock(const int xid, struct cifs_tcon *tcon,
355 const __u16 netfid, const __u64 len, 363 const __u16 netfid, const __u64 len,
356 const __u64 offset, const __u32 numUnlock, 364 const __u64 offset, const __u32 numUnlock,
357 const __u32 numLock, const __u8 lockType, 365 const __u32 numLock, const __u8 lockType,
358 const bool waitFlag, const __u8 oplock_level); 366 const bool waitFlag, const __u8 oplock_level);
359extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 367extern int CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon,
360 const __u16 smb_file_id, const int get_flag, 368 const __u16 smb_file_id, const int get_flag,
361 const __u64 len, struct file_lock *, 369 const __u64 len, struct file_lock *,
362 const __u16 lock_type, const bool waitFlag); 370 const __u16 lock_type, const bool waitFlag);
363extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); 371extern int CIFSSMBTDis(const int xid, struct cifs_tcon *tcon);
364extern int CIFSSMBEcho(struct TCP_Server_Info *server); 372extern int CIFSSMBEcho(struct TCP_Server_Info *server);
365extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); 373extern int CIFSSMBLogoff(const int xid, struct cifs_ses *ses);
366 374
367extern struct cifsSesInfo *sesInfoAlloc(void); 375extern struct cifs_ses *sesInfoAlloc(void);
368extern void sesInfoFree(struct cifsSesInfo *); 376extern void sesInfoFree(struct cifs_ses *);
369extern struct cifsTconInfo *tconInfoAlloc(void); 377extern struct cifs_tcon *tconInfoAlloc(void);
370extern void tconInfoFree(struct cifsTconInfo *); 378extern void tconInfoFree(struct cifs_tcon *);
371 379
372extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); 380extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
373extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 381extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
@@ -376,49 +384,51 @@ extern int cifs_verify_signature(struct smb_hdr *,
376 struct TCP_Server_Info *server, 384 struct TCP_Server_Info *server,
377 __u32 expected_sequence_number); 385 __u32 expected_sequence_number);
378extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); 386extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
379extern int setup_ntlm_response(struct cifsSesInfo *); 387extern int setup_ntlm_response(struct cifs_ses *);
380extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); 388extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
381extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); 389extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
382extern void cifs_crypto_shash_release(struct TCP_Server_Info *); 390extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
383extern int calc_seckey(struct cifsSesInfo *); 391extern int calc_seckey(struct cifs_ses *);
384 392
385#ifdef CONFIG_CIFS_WEAK_PW_HASH 393#ifdef CONFIG_CIFS_WEAK_PW_HASH
386extern void calc_lanman_hash(const char *password, const char *cryptkey, 394extern int calc_lanman_hash(const char *password, const char *cryptkey,
387 bool encrypt, char *lnm_session_key); 395 bool encrypt, char *lnm_session_key);
388#endif /* CIFS_WEAK_PW_HASH */ 396#endif /* CIFS_WEAK_PW_HASH */
397#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
398extern int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon,
399 const int notify_subdirs, const __u16 netfid,
400 __u32 filter, struct file *file, int multishot,
401 const struct nls_table *nls_codepage);
402#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
389extern int CIFSSMBCopy(int xid, 403extern int CIFSSMBCopy(int xid,
390 struct cifsTconInfo *source_tcon, 404 struct cifs_tcon *source_tcon,
391 const char *fromName, 405 const char *fromName,
392 const __u16 target_tid, 406 const __u16 target_tid,
393 const char *toName, const int flags, 407 const char *toName, const int flags,
394 const struct nls_table *nls_codepage, 408 const struct nls_table *nls_codepage,
395 int remap_special_chars); 409 int remap_special_chars);
396extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, 410extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon,
397 const int notify_subdirs, const __u16 netfid,
398 __u32 filter, struct file *file, int multishot,
399 const struct nls_table *nls_codepage);
400extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
401 const unsigned char *searchName, 411 const unsigned char *searchName,
402 const unsigned char *ea_name, char *EAData, 412 const unsigned char *ea_name, char *EAData,
403 size_t bufsize, const struct nls_table *nls_codepage, 413 size_t bufsize, const struct nls_table *nls_codepage,
404 int remap_special_chars); 414 int remap_special_chars);
405extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, 415extern int CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon,
406 const char *fileName, const char *ea_name, 416 const char *fileName, const char *ea_name,
407 const void *ea_value, const __u16 ea_value_len, 417 const void *ea_value, const __u16 ea_value_len,
408 const struct nls_table *nls_codepage, int remap_special_chars); 418 const struct nls_table *nls_codepage, int remap_special_chars);
409extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, 419extern int CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon,
410 __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); 420 __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen);
411extern int CIFSSMBSetCIFSACL(const int, struct cifsTconInfo *, __u16, 421extern int CIFSSMBSetCIFSACL(const int, struct cifs_tcon *, __u16,
412 struct cifs_ntsd *, __u32); 422 struct cifs_ntsd *, __u32);
413extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, 423extern int CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon,
414 const unsigned char *searchName, 424 const unsigned char *searchName,
415 char *acl_inf, const int buflen, const int acl_type, 425 char *acl_inf, const int buflen, const int acl_type,
416 const struct nls_table *nls_codepage, int remap_special_chars); 426 const struct nls_table *nls_codepage, int remap_special_chars);
417extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, 427extern int CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon,
418 const unsigned char *fileName, 428 const unsigned char *fileName,
419 const char *local_acl, const int buflen, const int acl_type, 429 const char *local_acl, const int buflen, const int acl_type,
420 const struct nls_table *nls_codepage, int remap_special_chars); 430 const struct nls_table *nls_codepage, int remap_special_chars);
421extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, 431extern int CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon,
422 const int netfid, __u64 *pExtAttrBits, __u64 *pMask); 432 const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
423extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); 433extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
424extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); 434extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
@@ -427,9 +437,24 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
427 struct cifs_sb_info *cifs_sb, int xid); 437 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int); 438extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 439extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 440extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24); 441 unsigned char *p24);
442
443/* asynchronous write support */
444struct cifs_writedata {
445 struct kref refcount;
446 enum writeback_sync_modes sync_mode;
447 struct work_struct work;
448 struct cifsFileInfo *cfile;
449 __u64 offset;
450 unsigned int bytes;
451 int result;
452 unsigned int nr_pages;
453 struct page *pages[1];
454};
455
456int cifs_async_writev(struct cifs_writedata *wdata);
457struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages);
458void cifs_writedata_release(struct kref *refcount);
459
435#endif /* _CIFSPROTO_H */ 460#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..1a9fe7f816d1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -32,6 +32,7 @@
32#include <linux/vfs.h> 32#include <linux/vfs.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/posix_acl_xattr.h> 34#include <linux/posix_acl_xattr.h>
35#include <linux/pagemap.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include "cifspdu.h" 37#include "cifspdu.h"
37#include "cifsglob.h" 38#include "cifsglob.h"
@@ -84,7 +85,7 @@ static struct {
84 85
85/* Mark as invalid, all open files on tree connections since they 86/* Mark as invalid, all open files on tree connections since they
86 were closed when session to server was lost */ 87 were closed when session to server was lost */
87static void mark_open_files_invalid(struct cifsTconInfo *pTcon) 88static void mark_open_files_invalid(struct cifs_tcon *pTcon)
88{ 89{
89 struct cifsFileInfo *open_file = NULL; 90 struct cifsFileInfo *open_file = NULL;
90 struct list_head *tmp; 91 struct list_head *tmp;
@@ -104,10 +105,10 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
104 105
105/* reconnect the socket, tcon, and smb session if needed */ 106/* reconnect the socket, tcon, and smb session if needed */
106static int 107static int
107cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) 108cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
108{ 109{
109 int rc = 0; 110 int rc = 0;
110 struct cifsSesInfo *ses; 111 struct cifs_ses *ses;
111 struct TCP_Server_Info *server; 112 struct TCP_Server_Info *server;
112 struct nls_table *nls_codepage; 113 struct nls_table *nls_codepage;
113 114
@@ -226,7 +227,7 @@ out:
226 SMB information in the SMB header. If the return code is zero, this 227 SMB information in the SMB header. If the return code is zero, this
227 function must have filled in request_buf pointer */ 228 function must have filled in request_buf pointer */
228static int 229static int
229small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 230small_smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
230 void **request_buf) 231 void **request_buf)
231{ 232{
232 int rc; 233 int rc;
@@ -252,7 +253,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
252 253
253int 254int
254small_smb_init_no_tc(const int smb_command, const int wct, 255small_smb_init_no_tc(const int smb_command, const int wct,
255 struct cifsSesInfo *ses, void **request_buf) 256 struct cifs_ses *ses, void **request_buf)
256{ 257{
257 int rc; 258 int rc;
258 struct smb_hdr *buffer; 259 struct smb_hdr *buffer;
@@ -278,7 +279,7 @@ small_smb_init_no_tc(const int smb_command, const int wct,
278 279
279/* If the return code is zero, this function must fill in request_buf pointer */ 280/* If the return code is zero, this function must fill in request_buf pointer */
280static int 281static int
281__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 282__smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
282 void **request_buf, void **response_buf) 283 void **request_buf, void **response_buf)
283{ 284{
284 *request_buf = cifs_buf_get(); 285 *request_buf = cifs_buf_get();
@@ -304,7 +305,7 @@ __smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
304 305
305/* If the return code is zero, this function must fill in request_buf pointer */ 306/* If the return code is zero, this function must fill in request_buf pointer */
306static int 307static int
307smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 308smb_init(int smb_command, int wct, struct cifs_tcon *tcon,
308 void **request_buf, void **response_buf) 309 void **request_buf, void **response_buf)
309{ 310{
310 int rc; 311 int rc;
@@ -317,7 +318,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
317} 318}
318 319
319static int 320static int
320smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, 321smb_init_no_reconnect(int smb_command, int wct, struct cifs_tcon *tcon,
321 void **request_buf, void **response_buf) 322 void **request_buf, void **response_buf)
322{ 323{
323 if (tcon->ses->need_reconnect || tcon->need_reconnect) 324 if (tcon->ses->need_reconnect || tcon->need_reconnect)
@@ -339,12 +340,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) 340 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
340 goto vt2_err; 341 goto vt2_err;
341 342
342 /* check that bcc is at least as big as parms + data */
343 /* check that bcc is less than negotiated smb buffer */
344 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); 343 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
345 if (total_size >= 512) 344 if (total_size >= 512)
346 goto vt2_err; 345 goto vt2_err;
347 346
347 /* check that bcc is at least as big as parms + data, and that it is
348 * less than negotiated smb buffer
349 */
348 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); 350 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
349 if (total_size > get_bcc(&pSMB->hdr) || 351 if (total_size > get_bcc(&pSMB->hdr) ||
350 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) 352 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,8 +359,15 @@ vt2_err:
357 return -EINVAL; 359 return -EINVAL;
358} 360}
359 361
362static inline void inc_rfc1001_len(void *pSMB, int count)
363{
364 struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
365
366 be32_add_cpu(&hdr->smb_buf_length, count);
367}
368
360int 369int
361CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 370CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
362{ 371{
363 NEGOTIATE_REQ *pSMB; 372 NEGOTIATE_REQ *pSMB;
364 NEGOTIATE_RSP *pSMBr; 373 NEGOTIATE_RSP *pSMBr;
@@ -409,7 +418,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
409 count += strlen(protocols[i].name) + 1; 418 count += strlen(protocols[i].name) + 1;
410 /* null at end of source and target buffers anyway */ 419 /* null at end of source and target buffers anyway */
411 } 420 }
412 pSMB->hdr.smb_buf_length += count; 421 inc_rfc1001_len(pSMB, count);
413 pSMB->ByteCount = cpu_to_le16(count); 422 pSMB->ByteCount = cpu_to_le16(count);
414 423
415 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 424 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -442,7 +451,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
442 rc = -EOPNOTSUPP; 451 rc = -EOPNOTSUPP;
443 goto neg_err_exit; 452 goto neg_err_exit;
444 } 453 }
445 server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode); 454 server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode);
446 server->maxReq = le16_to_cpu(rsp->MaxMpxCount); 455 server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
447 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), 456 server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize),
448 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 457 (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
@@ -496,7 +505,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
496 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { 505 cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
497 memcpy(ses->server->cryptkey, rsp->EncryptionKey, 506 memcpy(ses->server->cryptkey, rsp->EncryptionKey,
498 CIFS_CRYPTO_KEY_SIZE); 507 CIFS_CRYPTO_KEY_SIZE);
499 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 508 } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
500 rc = -EIO; /* need cryptkey unless plain text */ 509 rc = -EIO; /* need cryptkey unless plain text */
501 goto neg_err_exit; 510 goto neg_err_exit;
502 } 511 }
@@ -518,11 +527,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
518 goto neg_err_exit; 527 goto neg_err_exit;
519 } 528 }
520 /* else wct == 17 NTLM */ 529 /* else wct == 17 NTLM */
521 server->secMode = pSMBr->SecurityMode; 530 server->sec_mode = pSMBr->SecurityMode;
522 if ((server->secMode & SECMODE_USER) == 0) 531 if ((server->sec_mode & SECMODE_USER) == 0)
523 cFYI(1, "share mode security"); 532 cFYI(1, "share mode security");
524 533
525 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0) 534 if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0)
526#ifdef CONFIG_CIFS_WEAK_PW_HASH 535#ifdef CONFIG_CIFS_WEAK_PW_HASH
527 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) 536 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
528#endif /* CIFS_WEAK_PW_HASH */ 537#endif /* CIFS_WEAK_PW_HASH */
@@ -541,10 +550,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
541 server->secType = RawNTLMSSP; 550 server->secType = RawNTLMSSP;
542 else if (secFlags & CIFSSEC_MAY_LANMAN) 551 else if (secFlags & CIFSSEC_MAY_LANMAN)
543 server->secType = LANMAN; 552 server->secType = LANMAN;
544/* #ifdef CONFIG_CIFS_EXPERIMENTAL
545 else if (secFlags & CIFSSEC_MAY_PLNTXT)
546 server->secType = ??
547#endif */
548 else { 553 else {
549 rc = -EOPNOTSUPP; 554 rc = -EOPNOTSUPP;
550 cERROR(1, "Invalid security type"); 555 cERROR(1, "Invalid security type");
@@ -566,19 +571,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
566 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 571 if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
567 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, 572 memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
568 CIFS_CRYPTO_KEY_SIZE); 573 CIFS_CRYPTO_KEY_SIZE);
569 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) 574 } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
570 && (pSMBr->EncryptionKeyLength == 0)) { 575 server->capabilities & CAP_EXTENDED_SECURITY) &&
576 (pSMBr->EncryptionKeyLength == 0)) {
571 /* decode security blob */ 577 /* decode security blob */
572 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 578 count = get_bcc(&pSMBr->hdr);
573 rc = -EIO; /* no crypt key only if plain text pwd */
574 goto neg_err_exit;
575 }
576
577 /* BB might be helpful to save off the domain of server here */
578
579 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
580 (server->capabilities & CAP_EXTENDED_SECURITY)) {
581 count = pSMBr->ByteCount;
582 if (count < 16) { 579 if (count < 16) {
583 rc = -EIO; 580 rc = -EIO;
584 goto neg_err_exit; 581 goto neg_err_exit;
@@ -620,6 +617,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
620 } else 617 } else
621 rc = -EOPNOTSUPP; 618 rc = -EOPNOTSUPP;
622 } 619 }
620 } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
621 rc = -EIO; /* no crypt key only if plain text pwd */
622 goto neg_err_exit;
623 } else 623 } else
624 server->capabilities &= ~CAP_EXTENDED_SECURITY; 624 server->capabilities &= ~CAP_EXTENDED_SECURITY;
625 625
@@ -630,27 +630,27 @@ signing_check:
630 /* MUST_SIGN already includes the MAY_SIGN FLAG 630 /* MUST_SIGN already includes the MAY_SIGN FLAG
631 so if this is zero it means that signing is disabled */ 631 so if this is zero it means that signing is disabled */
632 cFYI(1, "Signing disabled"); 632 cFYI(1, "Signing disabled");
633 if (server->secMode & SECMODE_SIGN_REQUIRED) { 633 if (server->sec_mode & SECMODE_SIGN_REQUIRED) {
634 cERROR(1, "Server requires " 634 cERROR(1, "Server requires "
635 "packet signing to be enabled in " 635 "packet signing to be enabled in "
636 "/proc/fs/cifs/SecurityFlags."); 636 "/proc/fs/cifs/SecurityFlags.");
637 rc = -EOPNOTSUPP; 637 rc = -EOPNOTSUPP;
638 } 638 }
639 server->secMode &= 639 server->sec_mode &=
640 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 640 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
641 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { 641 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
642 /* signing required */ 642 /* signing required */
643 cFYI(1, "Must sign - secFlags 0x%x", secFlags); 643 cFYI(1, "Must sign - secFlags 0x%x", secFlags);
644 if ((server->secMode & 644 if ((server->sec_mode &
645 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { 645 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
646 cERROR(1, "signing required but server lacks support"); 646 cERROR(1, "signing required but server lacks support");
647 rc = -EOPNOTSUPP; 647 rc = -EOPNOTSUPP;
648 } else 648 } else
649 server->secMode |= SECMODE_SIGN_REQUIRED; 649 server->sec_mode |= SECMODE_SIGN_REQUIRED;
650 } else { 650 } else {
651 /* signing optional ie CIFSSEC_MAY_SIGN */ 651 /* signing optional ie CIFSSEC_MAY_SIGN */
652 if ((server->secMode & SECMODE_SIGN_REQUIRED) == 0) 652 if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0)
653 server->secMode &= 653 server->sec_mode &=
654 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 654 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
655 } 655 }
656 656
@@ -662,7 +662,7 @@ neg_err_exit:
662} 662}
663 663
664int 664int
665CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) 665CIFSSMBTDis(const int xid, struct cifs_tcon *tcon)
666{ 666{
667 struct smb_hdr *smb_buffer; 667 struct smb_hdr *smb_buffer;
668 int rc = 0; 668 int rc = 0;
@@ -721,6 +721,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
721{ 721{
722 ECHO_REQ *smb; 722 ECHO_REQ *smb;
723 int rc = 0; 723 int rc = 0;
724 struct kvec iov;
724 725
725 cFYI(1, "In echo request"); 726 cFYI(1, "In echo request");
726 727
@@ -732,12 +733,13 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
732 smb->hdr.Tid = 0xffff; 733 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 734 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 735 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 736 put_bcc(1, &smb->hdr);
736 smb->Data[0] = 'a'; 737 smb->Data[0] = 'a';
737 smb->hdr.smb_buf_length += 3; 738 inc_rfc1001_len(smb, 3);
739 iov.iov_base = smb;
740 iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
738 741
739 rc = cifs_call_async(server, (struct smb_hdr *)smb, 742 rc = cifs_call_async(server, &iov, 1, cifs_echo_callback, server, true);
740 cifs_echo_callback, server);
741 if (rc) 743 if (rc)
742 cFYI(1, "Echo request failed: %d", rc); 744 cFYI(1, "Echo request failed: %d", rc);
743 745
@@ -747,7 +749,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
747} 749}
748 750
749int 751int
750CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) 752CIFSSMBLogoff(const int xid, struct cifs_ses *ses)
751{ 753{
752 LOGOFF_ANDX_REQ *pSMB; 754 LOGOFF_ANDX_REQ *pSMB;
753 int rc = 0; 755 int rc = 0;
@@ -774,7 +776,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
774 776
775 pSMB->hdr.Mid = GetNextMid(ses->server); 777 pSMB->hdr.Mid = GetNextMid(ses->server);
776 778
777 if (ses->server->secMode & 779 if (ses->server->sec_mode &
778 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 780 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
779 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 781 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
780 782
@@ -794,7 +796,7 @@ session_already_dead:
794} 796}
795 797
796int 798int
797CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, 799CIFSPOSIXDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName,
798 __u16 type, const struct nls_table *nls_codepage, int remap) 800 __u16 type, const struct nls_table *nls_codepage, int remap)
799{ 801{
800 TRANSACTION2_SPI_REQ *pSMB = NULL; 802 TRANSACTION2_SPI_REQ *pSMB = NULL;
@@ -852,7 +854,7 @@ PsxDelete:
852 pSMB->TotalParameterCount = pSMB->ParameterCount; 854 pSMB->TotalParameterCount = pSMB->ParameterCount;
853 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); 855 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
854 pSMB->Reserved4 = 0; 856 pSMB->Reserved4 = 0;
855 pSMB->hdr.smb_buf_length += byte_count; 857 inc_rfc1001_len(pSMB, byte_count);
856 pSMB->ByteCount = cpu_to_le16(byte_count); 858 pSMB->ByteCount = cpu_to_le16(byte_count);
857 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 859 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
858 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 860 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -869,7 +871,7 @@ PsxDelete:
869} 871}
870 872
871int 873int
872CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, 874CIFSSMBDelFile(const int xid, struct cifs_tcon *tcon, const char *fileName,
873 const struct nls_table *nls_codepage, int remap) 875 const struct nls_table *nls_codepage, int remap)
874{ 876{
875 DELETE_FILE_REQ *pSMB = NULL; 877 DELETE_FILE_REQ *pSMB = NULL;
@@ -898,7 +900,7 @@ DelFileRetry:
898 pSMB->SearchAttributes = 900 pSMB->SearchAttributes =
899 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 901 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
900 pSMB->BufferFormat = 0x04; 902 pSMB->BufferFormat = 0x04;
901 pSMB->hdr.smb_buf_length += name_len + 1; 903 inc_rfc1001_len(pSMB, name_len + 1);
902 pSMB->ByteCount = cpu_to_le16(name_len + 1); 904 pSMB->ByteCount = cpu_to_le16(name_len + 1);
903 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 905 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
904 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 906 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -914,7 +916,7 @@ DelFileRetry:
914} 916}
915 917
916int 918int
917CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName, 919CIFSSMBRmDir(const int xid, struct cifs_tcon *tcon, const char *dirName,
918 const struct nls_table *nls_codepage, int remap) 920 const struct nls_table *nls_codepage, int remap)
919{ 921{
920 DELETE_DIRECTORY_REQ *pSMB = NULL; 922 DELETE_DIRECTORY_REQ *pSMB = NULL;
@@ -942,7 +944,7 @@ RmDirRetry:
942 } 944 }
943 945
944 pSMB->BufferFormat = 0x04; 946 pSMB->BufferFormat = 0x04;
945 pSMB->hdr.smb_buf_length += name_len + 1; 947 inc_rfc1001_len(pSMB, name_len + 1);
946 pSMB->ByteCount = cpu_to_le16(name_len + 1); 948 pSMB->ByteCount = cpu_to_le16(name_len + 1);
947 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 949 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
948 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 950 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -957,7 +959,7 @@ RmDirRetry:
957} 959}
958 960
959int 961int
960CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon, 962CIFSSMBMkDir(const int xid, struct cifs_tcon *tcon,
961 const char *name, const struct nls_table *nls_codepage, int remap) 963 const char *name, const struct nls_table *nls_codepage, int remap)
962{ 964{
963 int rc = 0; 965 int rc = 0;
@@ -985,7 +987,7 @@ MkDirRetry:
985 } 987 }
986 988
987 pSMB->BufferFormat = 0x04; 989 pSMB->BufferFormat = 0x04;
988 pSMB->hdr.smb_buf_length += name_len + 1; 990 inc_rfc1001_len(pSMB, name_len + 1);
989 pSMB->ByteCount = cpu_to_le16(name_len + 1); 991 pSMB->ByteCount = cpu_to_le16(name_len + 1);
990 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 992 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
991 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 993 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1000,7 +1002,7 @@ MkDirRetry:
1000} 1002}
1001 1003
1002int 1004int
1003CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags, 1005CIFSPOSIXCreate(const int xid, struct cifs_tcon *tcon, __u32 posix_flags,
1004 __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData, 1006 __u64 mode, __u16 *netfid, FILE_UNIX_BASIC_INFO *pRetData,
1005 __u32 *pOplock, const char *name, 1007 __u32 *pOplock, const char *name,
1006 const struct nls_table *nls_codepage, int remap) 1008 const struct nls_table *nls_codepage, int remap)
@@ -1063,7 +1065,7 @@ PsxCreat:
1063 pSMB->TotalParameterCount = pSMB->ParameterCount; 1065 pSMB->TotalParameterCount = pSMB->ParameterCount;
1064 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); 1066 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
1065 pSMB->Reserved4 = 0; 1067 pSMB->Reserved4 = 0;
1066 pSMB->hdr.smb_buf_length += byte_count; 1068 inc_rfc1001_len(pSMB, byte_count);
1067 pSMB->ByteCount = cpu_to_le16(byte_count); 1069 pSMB->ByteCount = cpu_to_le16(byte_count);
1068 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1070 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1069 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1071 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1077,7 @@ PsxCreat:
1075 cFYI(1, "copying inode info"); 1077 cFYI(1, "copying inode info");
1076 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1078 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1077 1079
1078 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1080 if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
1079 rc = -EIO; /* bad smb */ 1081 rc = -EIO; /* bad smb */
1080 goto psx_create_err; 1082 goto psx_create_err;
1081 } 1083 }
@@ -1096,7 +1098,7 @@ PsxCreat:
1096 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1098 pRetData->Type = cpu_to_le32(-1); /* unknown */
1097 cFYI(DBG2, "unknown type"); 1099 cFYI(DBG2, "unknown type");
1098 } else { 1100 } else {
1099 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1101 if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
1100 + sizeof(FILE_UNIX_BASIC_INFO)) { 1102 + sizeof(FILE_UNIX_BASIC_INFO)) {
1101 cERROR(1, "Open response data too small"); 1103 cERROR(1, "Open response data too small");
1102 pRetData->Type = cpu_to_le32(-1); 1104 pRetData->Type = cpu_to_le32(-1);
@@ -1166,7 +1168,7 @@ access_flags_to_smbopen_mode(const int access_flags)
1166} 1168}
1167 1169
1168int 1170int
1169SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, 1171SMBLegacyOpen(const int xid, struct cifs_tcon *tcon,
1170 const char *fileName, const int openDisposition, 1172 const char *fileName, const int openDisposition,
1171 const int access_flags, const int create_options, __u16 *netfid, 1173 const int access_flags, const int create_options, __u16 *netfid,
1172 int *pOplock, FILE_ALL_INFO *pfile_info, 1174 int *pOplock, FILE_ALL_INFO *pfile_info,
@@ -1228,7 +1230,7 @@ OldOpenRetry:
1228 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); 1230 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
1229 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); 1231 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
1230 count += name_len; 1232 count += name_len;
1231 pSMB->hdr.smb_buf_length += count; 1233 inc_rfc1001_len(pSMB, count);
1232 1234
1233 pSMB->ByteCount = cpu_to_le16(count); 1235 pSMB->ByteCount = cpu_to_le16(count);
1234 /* long_op set to 1 to allow for oplock break timeouts */ 1236 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1273,7 +1275,7 @@ OldOpenRetry:
1273} 1275}
1274 1276
1275int 1277int
1276CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 1278CIFSSMBOpen(const int xid, struct cifs_tcon *tcon,
1277 const char *fileName, const int openDisposition, 1279 const char *fileName, const int openDisposition,
1278 const int access_flags, const int create_options, __u16 *netfid, 1280 const int access_flags, const int create_options, __u16 *netfid,
1279 int *pOplock, FILE_ALL_INFO *pfile_info, 1281 int *pOplock, FILE_ALL_INFO *pfile_info,
@@ -1341,7 +1343,7 @@ openRetry:
1341 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY; 1343 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
1342 1344
1343 count += name_len; 1345 count += name_len;
1344 pSMB->hdr.smb_buf_length += count; 1346 inc_rfc1001_len(pSMB, count);
1345 1347
1346 pSMB->ByteCount = cpu_to_le16(count); 1348 pSMB->ByteCount = cpu_to_le16(count);
1347 /* long_op set to 1 to allow for oplock break timeouts */ 1349 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1375,8 +1377,7 @@ openRetry:
1375} 1377}
1376 1378
1377int 1379int
1378CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid, 1380CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes,
1379 const unsigned int count, const __u64 lseek, unsigned int *nbytes,
1380 char **buf, int *pbuf_type) 1381 char **buf, int *pbuf_type)
1381{ 1382{
1382 int rc = -EACCES; 1383 int rc = -EACCES;
@@ -1386,13 +1387,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1386 int wct; 1387 int wct;
1387 int resp_buf_type = 0; 1388 int resp_buf_type = 0;
1388 struct kvec iov[1]; 1389 struct kvec iov[1];
1390 __u32 pid = io_parms->pid;
1391 __u16 netfid = io_parms->netfid;
1392 __u64 offset = io_parms->offset;
1393 struct cifs_tcon *tcon = io_parms->tcon;
1394 unsigned int count = io_parms->length;
1389 1395
1390 cFYI(1, "Reading %d bytes on fid %d", count, netfid); 1396 cFYI(1, "Reading %d bytes on fid %d", count, netfid);
1391 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1397 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1392 wct = 12; 1398 wct = 12;
1393 else { 1399 else {
1394 wct = 10; /* old style read */ 1400 wct = 10; /* old style read */
1395 if ((lseek >> 32) > 0) { 1401 if ((offset >> 32) > 0) {
1396 /* can not handle this big offset for old */ 1402 /* can not handle this big offset for old */
1397 return -EIO; 1403 return -EIO;
1398 } 1404 }
@@ -1403,15 +1409,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1403 if (rc) 1409 if (rc)
1404 return rc; 1410 return rc;
1405 1411
1412 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1413 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1414
1406 /* tcon and ses pointer are checked in smb_init */ 1415 /* tcon and ses pointer are checked in smb_init */
1407 if (tcon->ses->server == NULL) 1416 if (tcon->ses->server == NULL)
1408 return -ECONNABORTED; 1417 return -ECONNABORTED;
1409 1418
1410 pSMB->AndXCommand = 0xFF; /* none */ 1419 pSMB->AndXCommand = 0xFF; /* none */
1411 pSMB->Fid = netfid; 1420 pSMB->Fid = netfid;
1412 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF); 1421 pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF);
1413 if (wct == 12) 1422 if (wct == 12)
1414 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32); 1423 pSMB->OffsetHigh = cpu_to_le32(offset >> 32);
1415 1424
1416 pSMB->Remaining = 0; 1425 pSMB->Remaining = 0;
1417 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF); 1426 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
@@ -1426,7 +1435,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1426 } 1435 }
1427 1436
1428 iov[0].iov_base = (char *)pSMB; 1437 iov[0].iov_base = (char *)pSMB;
1429 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1438 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1430 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1439 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1431 &resp_buf_type, CIFS_LOG_ERROR); 1440 &resp_buf_type, CIFS_LOG_ERROR);
1432 cifs_stats_inc(&tcon->num_reads); 1441 cifs_stats_inc(&tcon->num_reads);
@@ -1480,9 +1489,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1480 1489
1481 1490
1482int 1491int
1483CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 1492CIFSSMBWrite(const int xid, struct cifs_io_parms *io_parms,
1484 const int netfid, const unsigned int count, 1493 unsigned int *nbytes, const char *buf,
1485 const __u64 offset, unsigned int *nbytes, const char *buf,
1486 const char __user *ubuf, const int long_op) 1494 const char __user *ubuf, const int long_op)
1487{ 1495{
1488 int rc = -EACCES; 1496 int rc = -EACCES;
@@ -1491,6 +1499,11 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1491 int bytes_returned, wct; 1499 int bytes_returned, wct;
1492 __u32 bytes_sent; 1500 __u32 bytes_sent;
1493 __u16 byte_count; 1501 __u16 byte_count;
1502 __u32 pid = io_parms->pid;
1503 __u16 netfid = io_parms->netfid;
1504 __u64 offset = io_parms->offset;
1505 struct cifs_tcon *tcon = io_parms->tcon;
1506 unsigned int count = io_parms->length;
1494 1507
1495 *nbytes = 0; 1508 *nbytes = 0;
1496 1509
@@ -1512,6 +1525,10 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1512 (void **) &pSMBr); 1525 (void **) &pSMBr);
1513 if (rc) 1526 if (rc)
1514 return rc; 1527 return rc;
1528
1529 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1530 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1531
1515 /* tcon and ses pointer are checked in smb_init */ 1532 /* tcon and ses pointer are checked in smb_init */
1516 if (tcon->ses->server == NULL) 1533 if (tcon->ses->server == NULL)
1517 return -ECONNABORTED; 1534 return -ECONNABORTED;
@@ -1560,7 +1577,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1560 1577
1561 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1578 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1562 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1579 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1563 pSMB->hdr.smb_buf_length += byte_count; 1580 inc_rfc1001_len(pSMB, byte_count);
1564 1581
1565 if (wct == 14) 1582 if (wct == 14)
1566 pSMB->ByteCount = cpu_to_le16(byte_count); 1583 pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1598,17 +1615,259 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1598 return rc; 1615 return rc;
1599} 1616}
1600 1617
1618void
1619cifs_writedata_release(struct kref *refcount)
1620{
1621 struct cifs_writedata *wdata = container_of(refcount,
1622 struct cifs_writedata, refcount);
1623
1624 if (wdata->cfile)
1625 cifsFileInfo_put(wdata->cfile);
1626
1627 kfree(wdata);
1628}
1629
1630/*
1631 * Write failed with a retryable error. Resend the write request. It's also
1632 * possible that the page was redirtied so re-clean the page.
1633 */
1634static void
1635cifs_writev_requeue(struct cifs_writedata *wdata)
1636{
1637 int i, rc;
1638 struct inode *inode = wdata->cfile->dentry->d_inode;
1639
1640 for (i = 0; i < wdata->nr_pages; i++) {
1641 lock_page(wdata->pages[i]);
1642 clear_page_dirty_for_io(wdata->pages[i]);
1643 }
1644
1645 do {
1646 rc = cifs_async_writev(wdata);
1647 } while (rc == -EAGAIN);
1648
1649 for (i = 0; i < wdata->nr_pages; i++) {
1650 if (rc != 0)
1651 SetPageError(wdata->pages[i]);
1652 unlock_page(wdata->pages[i]);
1653 }
1654
1655 mapping_set_error(inode->i_mapping, rc);
1656 kref_put(&wdata->refcount, cifs_writedata_release);
1657}
1658
1659static void
1660cifs_writev_complete(struct work_struct *work)
1661{
1662 struct cifs_writedata *wdata = container_of(work,
1663 struct cifs_writedata, work);
1664 struct inode *inode = wdata->cfile->dentry->d_inode;
1665 int i = 0;
1666
1667 if (wdata->result == 0) {
1668 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
1669 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
1670 wdata->bytes);
1671 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
1672 return cifs_writev_requeue(wdata);
1673
1674 for (i = 0; i < wdata->nr_pages; i++) {
1675 struct page *page = wdata->pages[i];
1676 if (wdata->result == -EAGAIN)
1677 __set_page_dirty_nobuffers(page);
1678 else if (wdata->result < 0)
1679 SetPageError(page);
1680 end_page_writeback(page);
1681 page_cache_release(page);
1682 }
1683 if (wdata->result != -EAGAIN)
1684 mapping_set_error(inode->i_mapping, wdata->result);
1685 kref_put(&wdata->refcount, cifs_writedata_release);
1686}
1687
1688struct cifs_writedata *
1689cifs_writedata_alloc(unsigned int nr_pages)
1690{
1691 struct cifs_writedata *wdata;
1692
1693 /* this would overflow */
1694 if (nr_pages == 0) {
1695 cERROR(1, "%s: called with nr_pages == 0!", __func__);
1696 return NULL;
1697 }
1698
1699 /* writedata + number of page pointers */
1700 wdata = kzalloc(sizeof(*wdata) +
1701 sizeof(struct page *) * (nr_pages - 1), GFP_NOFS);
1702 if (wdata != NULL) {
1703 INIT_WORK(&wdata->work, cifs_writev_complete);
1704 kref_init(&wdata->refcount);
1705 }
1706 return wdata;
1707}
1708
1709/*
1710 * Check the midState and signature on received buffer (if any), and queue the
1711 * workqueue completion task.
1712 */
1713static void
1714cifs_writev_callback(struct mid_q_entry *mid)
1715{
1716 struct cifs_writedata *wdata = mid->callback_data;
1717 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1718 unsigned int written;
1719 WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
1720
1721 switch (mid->midState) {
1722 case MID_RESPONSE_RECEIVED:
1723 wdata->result = cifs_check_receive(mid, tcon->ses->server, 0);
1724 if (wdata->result != 0)
1725 break;
1726
1727 written = le16_to_cpu(smb->CountHigh);
1728 written <<= 16;
1729 written += le16_to_cpu(smb->Count);
1730 /*
1731 * Mask off high 16 bits when bytes written as returned
1732 * by the server is greater than bytes requested by the
1733 * client. OS/2 servers are known to set incorrect
1734 * CountHigh values.
1735 */
1736 if (written > wdata->bytes)
1737 written &= 0xFFFF;
1738
1739 if (written < wdata->bytes)
1740 wdata->result = -ENOSPC;
1741 else
1742 wdata->bytes = written;
1743 break;
1744 case MID_REQUEST_SUBMITTED:
1745 case MID_RETRY_NEEDED:
1746 wdata->result = -EAGAIN;
1747 break;
1748 default:
1749 wdata->result = -EIO;
1750 break;
1751 }
1752
1753 queue_work(system_nrt_wq, &wdata->work);
1754 DeleteMidQEntry(mid);
1755 atomic_dec(&tcon->ses->server->inFlight);
1756 wake_up(&tcon->ses->server->request_q);
1757}
1758
1759/* cifs_async_writev - send an async write, and set up mid to handle result */
1760int
1761cifs_async_writev(struct cifs_writedata *wdata)
1762{
1763 int i, rc = -EACCES;
1764 WRITE_REQ *smb = NULL;
1765 int wct;
1766 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1767 struct inode *inode = wdata->cfile->dentry->d_inode;
1768 struct kvec *iov = NULL;
1769
1770 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
1771 wct = 14;
1772 } else {
1773 wct = 12;
1774 if (wdata->offset >> 32 > 0) {
1775 /* can not handle big offset for old srv */
1776 return -EIO;
1777 }
1778 }
1779
1780 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **)&smb);
1781 if (rc)
1782 goto async_writev_out;
1783
1784 /* 1 iov per page + 1 for header */
1785 iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
1786 if (iov == NULL) {
1787 rc = -ENOMEM;
1788 goto async_writev_out;
1789 }
1790
1791 smb->hdr.Pid = cpu_to_le16((__u16)wdata->cfile->pid);
1792 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->cfile->pid >> 16));
1793
1794 smb->AndXCommand = 0xFF; /* none */
1795 smb->Fid = wdata->cfile->netfid;
1796 smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF);
1797 if (wct == 14)
1798 smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32);
1799 smb->Reserved = 0xFFFFFFFF;
1800 smb->WriteMode = 0;
1801 smb->Remaining = 0;
1802
1803 smb->DataOffset =
1804 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
1805
1806 /* 4 for RFC1001 length + 1 for BCC */
1807 iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
1808 iov[0].iov_base = smb;
1809
1810 /* marshal up the pages into iov array */
1811 wdata->bytes = 0;
1812 for (i = 0; i < wdata->nr_pages; i++) {
1813 iov[i + 1].iov_len = min(inode->i_size -
1814 page_offset(wdata->pages[i]),
1815 (loff_t)PAGE_CACHE_SIZE);
1816 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1817 wdata->bytes += iov[i + 1].iov_len;
1818 }
1819
1820 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
1821
1822 smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF);
1823 smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16);
1824
1825 if (wct == 14) {
1826 inc_rfc1001_len(&smb->hdr, wdata->bytes + 1);
1827 put_bcc(wdata->bytes + 1, &smb->hdr);
1828 } else {
1829 /* wct == 12 */
1830 struct smb_com_writex_req *smbw =
1831 (struct smb_com_writex_req *)smb;
1832 inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5);
1833 put_bcc(wdata->bytes + 5, &smbw->hdr);
1834 iov[0].iov_len += 4; /* pad bigger by four bytes */
1835 }
1836
1837 kref_get(&wdata->refcount);
1838 rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1,
1839 cifs_writev_callback, wdata, false);
1840
1841 if (rc == 0)
1842 cifs_stats_inc(&tcon->num_writes);
1843 else
1844 kref_put(&wdata->refcount, cifs_writedata_release);
1845
1846 /* send is done, unmap pages */
1847 for (i = 0; i < wdata->nr_pages; i++)
1848 kunmap(wdata->pages[i]);
1849
1850async_writev_out:
1851 cifs_small_buf_release(smb);
1852 kfree(iov);
1853 return rc;
1854}
1855
1601int 1856int
1602CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 1857CIFSSMBWrite2(const int xid, struct cifs_io_parms *io_parms,
1603 const int netfid, const unsigned int count, 1858 unsigned int *nbytes, struct kvec *iov, int n_vec,
1604 const __u64 offset, unsigned int *nbytes, struct kvec *iov, 1859 const int long_op)
1605 int n_vec, const int long_op)
1606{ 1860{
1607 int rc = -EACCES; 1861 int rc = -EACCES;
1608 WRITE_REQ *pSMB = NULL; 1862 WRITE_REQ *pSMB = NULL;
1609 int wct; 1863 int wct;
1610 int smb_hdr_len; 1864 int smb_hdr_len;
1611 int resp_buf_type = 0; 1865 int resp_buf_type = 0;
1866 __u32 pid = io_parms->pid;
1867 __u16 netfid = io_parms->netfid;
1868 __u64 offset = io_parms->offset;
1869 struct cifs_tcon *tcon = io_parms->tcon;
1870 unsigned int count = io_parms->length;
1612 1871
1613 *nbytes = 0; 1872 *nbytes = 0;
1614 1873
@@ -1626,6 +1885,10 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1626 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB); 1885 rc = small_smb_init(SMB_COM_WRITE_ANDX, wct, tcon, (void **) &pSMB);
1627 if (rc) 1886 if (rc)
1628 return rc; 1887 return rc;
1888
1889 pSMB->hdr.Pid = cpu_to_le16((__u16)pid);
1890 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid >> 16));
1891
1629 /* tcon and ses pointer are checked in smb_init */ 1892 /* tcon and ses pointer are checked in smb_init */
1630 if (tcon->ses->server == NULL) 1893 if (tcon->ses->server == NULL)
1631 return -ECONNABORTED; 1894 return -ECONNABORTED;
@@ -1644,11 +1907,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1644 1907
1645 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); 1908 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
1646 pSMB->DataLengthHigh = cpu_to_le16(count >> 16); 1909 pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
1647 smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ 1910 /* header + 1 byte pad */
1911 smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
1648 if (wct == 14) 1912 if (wct == 14)
1649 pSMB->hdr.smb_buf_length += count+1; 1913 inc_rfc1001_len(pSMB, count + 1);
1650 else /* wct == 12 */ 1914 else /* wct == 12 */
1651 pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 1915 inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
1652 if (wct == 14) 1916 if (wct == 14)
1653 pSMB->ByteCount = cpu_to_le16(count + 1); 1917 pSMB->ByteCount = cpu_to_le16(count + 1);
1654 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { 1918 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1700,7 +1964,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1700 1964
1701 1965
1702int 1966int
1703CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 1967CIFSSMBLock(const int xid, struct cifs_tcon *tcon,
1704 const __u16 smb_file_id, const __u64 len, 1968 const __u16 smb_file_id, const __u64 len,
1705 const __u64 offset, const __u32 numUnlock, 1969 const __u64 offset, const __u32 numUnlock,
1706 const __u32 numLock, const __u8 lockType, 1970 const __u32 numLock, const __u8 lockType,
@@ -1748,7 +2012,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1748 /* oplock break */ 2012 /* oplock break */
1749 count = 0; 2013 count = 0;
1750 } 2014 }
1751 pSMB->hdr.smb_buf_length += count; 2015 inc_rfc1001_len(pSMB, count);
1752 pSMB->ByteCount = cpu_to_le16(count); 2016 pSMB->ByteCount = cpu_to_le16(count);
1753 2017
1754 if (waitFlag) { 2018 if (waitFlag) {
@@ -1770,7 +2034,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1770} 2034}
1771 2035
1772int 2036int
1773CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, 2037CIFSSMBPosixLock(const int xid, struct cifs_tcon *tcon,
1774 const __u16 smb_file_id, const int get_flag, const __u64 len, 2038 const __u16 smb_file_id, const int get_flag, const __u64 len,
1775 struct file_lock *pLockData, const __u16 lock_type, 2039 struct file_lock *pLockData, const __u16 lock_type,
1776 const bool waitFlag) 2040 const bool waitFlag)
@@ -1839,14 +2103,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1839 pSMB->Fid = smb_file_id; 2103 pSMB->Fid = smb_file_id;
1840 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); 2104 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1841 pSMB->Reserved4 = 0; 2105 pSMB->Reserved4 = 0;
1842 pSMB->hdr.smb_buf_length += byte_count; 2106 inc_rfc1001_len(pSMB, byte_count);
1843 pSMB->ByteCount = cpu_to_le16(byte_count); 2107 pSMB->ByteCount = cpu_to_le16(byte_count);
1844 if (waitFlag) { 2108 if (waitFlag) {
1845 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 2109 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1846 (struct smb_hdr *) pSMBr, &bytes_returned); 2110 (struct smb_hdr *) pSMBr, &bytes_returned);
1847 } else { 2111 } else {
1848 iov[0].iov_base = (char *)pSMB; 2112 iov[0].iov_base = (char *)pSMB;
1849 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 2113 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1850 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 2114 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1851 &resp_buf_type, timeout); 2115 &resp_buf_type, timeout);
1852 pSMB = NULL; /* request buf already freed by SendReceive2. Do 2116 pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +2126,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 __u16 data_count; 2126 __u16 data_count;
1863 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2127 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1864 2128
1865 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { 2129 if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
1866 rc = -EIO; /* bad smb */ 2130 rc = -EIO; /* bad smb */
1867 goto plk_err_exit; 2131 goto plk_err_exit;
1868 } 2132 }
@@ -1908,7 +2172,7 @@ plk_err_exit:
1908 2172
1909 2173
1910int 2174int
1911CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) 2175CIFSSMBClose(const int xid, struct cifs_tcon *tcon, int smb_file_id)
1912{ 2176{
1913 int rc = 0; 2177 int rc = 0;
1914 CLOSE_REQ *pSMB = NULL; 2178 CLOSE_REQ *pSMB = NULL;
@@ -1941,7 +2205,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1941} 2205}
1942 2206
1943int 2207int
1944CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id) 2208CIFSSMBFlush(const int xid, struct cifs_tcon *tcon, int smb_file_id)
1945{ 2209{
1946 int rc = 0; 2210 int rc = 0;
1947 FLUSH_REQ *pSMB = NULL; 2211 FLUSH_REQ *pSMB = NULL;
@@ -1962,7 +2226,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1962} 2226}
1963 2227
1964int 2228int
1965CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, 2229CIFSSMBRename(const int xid, struct cifs_tcon *tcon,
1966 const char *fromName, const char *toName, 2230 const char *fromName, const char *toName,
1967 const struct nls_table *nls_codepage, int remap) 2231 const struct nls_table *nls_codepage, int remap)
1968{ 2232{
@@ -2012,7 +2276,7 @@ renameRetry:
2012 } 2276 }
2013 2277
2014 count = 1 /* 1st signature byte */ + name_len + name_len2; 2278 count = 1 /* 1st signature byte */ + name_len + name_len2;
2015 pSMB->hdr.smb_buf_length += count; 2279 inc_rfc1001_len(pSMB, count);
2016 pSMB->ByteCount = cpu_to_le16(count); 2280 pSMB->ByteCount = cpu_to_le16(count);
2017 2281
2018 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2282 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2029,7 +2293,7 @@ renameRetry:
2029 return rc; 2293 return rc;
2030} 2294}
2031 2295
2032int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, 2296int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon,
2033 int netfid, const char *target_name, 2297 int netfid, const char *target_name,
2034 const struct nls_table *nls_codepage, int remap) 2298 const struct nls_table *nls_codepage, int remap)
2035{ 2299{
@@ -2092,7 +2356,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2092 pSMB->InformationLevel = 2356 pSMB->InformationLevel =
2093 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); 2357 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
2094 pSMB->Reserved4 = 0; 2358 pSMB->Reserved4 = 0;
2095 pSMB->hdr.smb_buf_length += byte_count; 2359 inc_rfc1001_len(pSMB, byte_count);
2096 pSMB->ByteCount = cpu_to_le16(byte_count); 2360 pSMB->ByteCount = cpu_to_le16(byte_count);
2097 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2361 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2098 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2362 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2109,7 +2373,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2109} 2373}
2110 2374
2111int 2375int
2112CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char *fromName, 2376CIFSSMBCopy(const int xid, struct cifs_tcon *tcon, const char *fromName,
2113 const __u16 target_tid, const char *toName, const int flags, 2377 const __u16 target_tid, const char *toName, const int flags,
2114 const struct nls_table *nls_codepage, int remap) 2378 const struct nls_table *nls_codepage, int remap)
2115{ 2379{
@@ -2159,7 +2423,7 @@ copyRetry:
2159 } 2423 }
2160 2424
2161 count = 1 /* 1st signature byte */ + name_len + name_len2; 2425 count = 1 /* 1st signature byte */ + name_len + name_len2;
2162 pSMB->hdr.smb_buf_length += count; 2426 inc_rfc1001_len(pSMB, count);
2163 pSMB->ByteCount = cpu_to_le16(count); 2427 pSMB->ByteCount = cpu_to_le16(count);
2164 2428
2165 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2429 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2177,7 +2441,7 @@ copyRetry:
2177} 2441}
2178 2442
2179int 2443int
2180CIFSUnixCreateSymLink(const int xid, struct cifsTconInfo *tcon, 2444CIFSUnixCreateSymLink(const int xid, struct cifs_tcon *tcon,
2181 const char *fromName, const char *toName, 2445 const char *fromName, const char *toName,
2182 const struct nls_table *nls_codepage) 2446 const struct nls_table *nls_codepage)
2183{ 2447{
@@ -2249,7 +2513,7 @@ createSymLinkRetry:
2249 pSMB->DataOffset = cpu_to_le16(offset); 2513 pSMB->DataOffset = cpu_to_le16(offset);
2250 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); 2514 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
2251 pSMB->Reserved4 = 0; 2515 pSMB->Reserved4 = 0;
2252 pSMB->hdr.smb_buf_length += byte_count; 2516 inc_rfc1001_len(pSMB, byte_count);
2253 pSMB->ByteCount = cpu_to_le16(byte_count); 2517 pSMB->ByteCount = cpu_to_le16(byte_count);
2254 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2518 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2255 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2519 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2266,7 +2530,7 @@ createSymLinkRetry:
2266} 2530}
2267 2531
2268int 2532int
2269CIFSUnixCreateHardLink(const int xid, struct cifsTconInfo *tcon, 2533CIFSUnixCreateHardLink(const int xid, struct cifs_tcon *tcon,
2270 const char *fromName, const char *toName, 2534 const char *fromName, const char *toName,
2271 const struct nls_table *nls_codepage, int remap) 2535 const struct nls_table *nls_codepage, int remap)
2272{ 2536{
@@ -2335,7 +2599,7 @@ createHardLinkRetry:
2335 pSMB->DataOffset = cpu_to_le16(offset); 2599 pSMB->DataOffset = cpu_to_le16(offset);
2336 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); 2600 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
2337 pSMB->Reserved4 = 0; 2601 pSMB->Reserved4 = 0;
2338 pSMB->hdr.smb_buf_length += byte_count; 2602 inc_rfc1001_len(pSMB, byte_count);
2339 pSMB->ByteCount = cpu_to_le16(byte_count); 2603 pSMB->ByteCount = cpu_to_le16(byte_count);
2340 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2604 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2341 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2605 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2351,7 +2615,7 @@ createHardLinkRetry:
2351} 2615}
2352 2616
2353int 2617int
2354CIFSCreateHardLink(const int xid, struct cifsTconInfo *tcon, 2618CIFSCreateHardLink(const int xid, struct cifs_tcon *tcon,
2355 const char *fromName, const char *toName, 2619 const char *fromName, const char *toName,
2356 const struct nls_table *nls_codepage, int remap) 2620 const struct nls_table *nls_codepage, int remap)
2357{ 2621{
@@ -2406,7 +2670,7 @@ winCreateHardLinkRetry:
2406 } 2670 }
2407 2671
2408 count = 1 /* string type byte */ + name_len + name_len2; 2672 count = 1 /* string type byte */ + name_len + name_len2;
2409 pSMB->hdr.smb_buf_length += count; 2673 inc_rfc1001_len(pSMB, count);
2410 pSMB->ByteCount = cpu_to_le16(count); 2674 pSMB->ByteCount = cpu_to_le16(count);
2411 2675
2412 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2676 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2423,7 +2687,7 @@ winCreateHardLinkRetry:
2423} 2687}
2424 2688
2425int 2689int
2426CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, 2690CIFSSMBUnixQuerySymLink(const int xid, struct cifs_tcon *tcon,
2427 const unsigned char *searchName, char **symlinkinfo, 2691 const unsigned char *searchName, char **symlinkinfo,
2428 const struct nls_table *nls_codepage) 2692 const struct nls_table *nls_codepage)
2429{ 2693{
@@ -2477,7 +2741,7 @@ querySymLinkRetry:
2477 pSMB->ParameterCount = pSMB->TotalParameterCount; 2741 pSMB->ParameterCount = pSMB->TotalParameterCount;
2478 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); 2742 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
2479 pSMB->Reserved4 = 0; 2743 pSMB->Reserved4 = 0;
2480 pSMB->hdr.smb_buf_length += byte_count; 2744 inc_rfc1001_len(pSMB, byte_count);
2481 pSMB->ByteCount = cpu_to_le16(byte_count); 2745 pSMB->ByteCount = cpu_to_le16(byte_count);
2482 2746
2483 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2747 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2753,7 @@ querySymLinkRetry:
2489 2753
2490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2754 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2491 /* BB also check enough total bytes returned */ 2755 /* BB also check enough total bytes returned */
2492 if (rc || (pSMBr->ByteCount < 2)) 2756 if (rc || get_bcc(&pSMBr->hdr) < 2)
2493 rc = -EIO; 2757 rc = -EIO;
2494 else { 2758 else {
2495 bool is_unicode; 2759 bool is_unicode;
@@ -2516,9 +2780,19 @@ querySymLinkRetry:
2516 return rc; 2780 return rc;
2517} 2781}
2518 2782
2519#ifdef CONFIG_CIFS_EXPERIMENTAL 2783#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
2784/*
2785 * Recent Windows versions now create symlinks more frequently
2786 * and they use the "reparse point" mechanism below. We can of course
2787 * do symlinks nicely to Samba and other servers which support the
2788 * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
2789 * "MF" symlinks optionally, but for recent Windows we really need to
2790 * reenable the code below and fix the cifs_symlink callers to handle this.
2791 * In the interim this code has been moved to its own config option so
2792 * it is not compiled in by default until callers fixed up and more tested.
2793 */
2520int 2794int
2521CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2795CIFSSMBQueryReparseLinkInfo(const int xid, struct cifs_tcon *tcon,
2522 const unsigned char *searchName, 2796 const unsigned char *searchName,
2523 char *symlinkinfo, const int buflen, __u16 fid, 2797 char *symlinkinfo, const int buflen, __u16 fid,
2524 const struct nls_table *nls_codepage) 2798 const struct nls_table *nls_codepage)
@@ -2561,14 +2835,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2561 } else { /* decode response */ 2835 } else { /* decode response */
2562 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2836 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2563 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2837 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2564 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { 2838 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
2565 /* BB also check enough total bytes returned */ 2839 /* BB also check enough total bytes returned */
2566 rc = -EIO; /* bad smb */ 2840 rc = -EIO; /* bad smb */
2567 goto qreparse_out; 2841 goto qreparse_out;
2568 } 2842 }
2569 if (data_count && (data_count < 2048)) { 2843 if (data_count && (data_count < 2048)) {
2570 char *end_of_smb = 2 /* sizeof byte count */ + 2844 char *end_of_smb = 2 /* sizeof byte count */ +
2571 pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2845 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
2572 2846
2573 struct reparse_data *reparse_buf = 2847 struct reparse_data *reparse_buf =
2574 (struct reparse_data *) 2848 (struct reparse_data *)
@@ -2618,7 +2892,7 @@ qreparse_out:
2618 2892
2619 return rc; 2893 return rc;
2620} 2894}
2621#endif /* CIFS_EXPERIMENTAL */ 2895#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
2622 2896
2623#ifdef CONFIG_CIFS_POSIX 2897#ifdef CONFIG_CIFS_POSIX
2624 2898
@@ -2756,7 +3030,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2756} 3030}
2757 3031
2758int 3032int
2759CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, 3033CIFSSMBGetPosixACL(const int xid, struct cifs_tcon *tcon,
2760 const unsigned char *searchName, 3034 const unsigned char *searchName,
2761 char *acl_inf, const int buflen, const int acl_type, 3035 char *acl_inf, const int buflen, const int acl_type,
2762 const struct nls_table *nls_codepage, int remap) 3036 const struct nls_table *nls_codepage, int remap)
@@ -2814,7 +3088,7 @@ queryAclRetry:
2814 pSMB->ParameterCount = pSMB->TotalParameterCount; 3088 pSMB->ParameterCount = pSMB->TotalParameterCount;
2815 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); 3089 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
2816 pSMB->Reserved4 = 0; 3090 pSMB->Reserved4 = 0;
2817 pSMB->hdr.smb_buf_length += byte_count; 3091 inc_rfc1001_len(pSMB, byte_count);
2818 pSMB->ByteCount = cpu_to_le16(byte_count); 3092 pSMB->ByteCount = cpu_to_le16(byte_count);
2819 3093
2820 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3094 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +3100,8 @@ queryAclRetry:
2826 /* decode response */ 3100 /* decode response */
2827 3101
2828 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3102 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2829 if (rc || (pSMBr->ByteCount < 2))
2830 /* BB also check enough total bytes returned */ 3103 /* BB also check enough total bytes returned */
3104 if (rc || get_bcc(&pSMBr->hdr) < 2)
2831 rc = -EIO; /* bad smb */ 3105 rc = -EIO; /* bad smb */
2832 else { 3106 else {
2833 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3107 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2844,7 +3118,7 @@ queryAclRetry:
2844} 3118}
2845 3119
2846int 3120int
2847CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, 3121CIFSSMBSetPosixACL(const int xid, struct cifs_tcon *tcon,
2848 const unsigned char *fileName, 3122 const unsigned char *fileName,
2849 const char *local_acl, const int buflen, 3123 const char *local_acl, const int buflen,
2850 const int acl_type, 3124 const int acl_type,
@@ -2908,7 +3182,7 @@ setAclRetry:
2908 pSMB->ParameterCount = cpu_to_le16(params); 3182 pSMB->ParameterCount = cpu_to_le16(params);
2909 pSMB->TotalParameterCount = pSMB->ParameterCount; 3183 pSMB->TotalParameterCount = pSMB->ParameterCount;
2910 pSMB->Reserved4 = 0; 3184 pSMB->Reserved4 = 0;
2911 pSMB->hdr.smb_buf_length += byte_count; 3185 inc_rfc1001_len(pSMB, byte_count);
2912 pSMB->ByteCount = cpu_to_le16(byte_count); 3186 pSMB->ByteCount = cpu_to_le16(byte_count);
2913 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3187 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2914 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3188 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2924,7 +3198,7 @@ setACLerrorExit:
2924 3198
2925/* BB fix tabs in this function FIXME BB */ 3199/* BB fix tabs in this function FIXME BB */
2926int 3200int
2927CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, 3201CIFSGetExtAttr(const int xid, struct cifs_tcon *tcon,
2928 const int netfid, __u64 *pExtAttrBits, __u64 *pMask) 3202 const int netfid, __u64 *pExtAttrBits, __u64 *pMask)
2929{ 3203{
2930 int rc = 0; 3204 int rc = 0;
@@ -2966,7 +3240,7 @@ GetExtAttrRetry:
2966 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); 3240 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
2967 pSMB->Pad = 0; 3241 pSMB->Pad = 0;
2968 pSMB->Fid = netfid; 3242 pSMB->Fid = netfid;
2969 pSMB->hdr.smb_buf_length += byte_count; 3243 inc_rfc1001_len(pSMB, byte_count);
2970 pSMB->t2.ByteCount = cpu_to_le16(byte_count); 3244 pSMB->t2.ByteCount = cpu_to_le16(byte_count);
2971 3245
2972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3246 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +3250,8 @@ GetExtAttrRetry:
2976 } else { 3250 } else {
2977 /* decode response */ 3251 /* decode response */
2978 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3252 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2979 if (rc || (pSMBr->ByteCount < 2))
2980 /* BB also check enough total bytes returned */ 3253 /* BB also check enough total bytes returned */
3254 if (rc || get_bcc(&pSMBr->hdr) < 2)
2981 /* If rc should we check for EOPNOSUPP and 3255 /* If rc should we check for EOPNOSUPP and
2982 disable the srvino flag? or in caller? */ 3256 disable the srvino flag? or in caller? */
2983 rc = -EIO; /* bad smb */ 3257 rc = -EIO; /* bad smb */
@@ -3017,7 +3291,7 @@ GetExtAttrOut:
3017 */ 3291 */
3018static int 3292static int
3019smb_init_nttransact(const __u16 sub_command, const int setup_count, 3293smb_init_nttransact(const __u16 sub_command, const int setup_count,
3020 const int parm_len, struct cifsTconInfo *tcon, 3294 const int parm_len, struct cifs_tcon *tcon,
3021 void **ret_buf) 3295 void **ret_buf)
3022{ 3296{
3023 int rc; 3297 int rc;
@@ -3052,6 +3326,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3052 char *end_of_smb; 3326 char *end_of_smb;
3053 __u32 data_count, data_offset, parm_count, parm_offset; 3327 __u32 data_count, data_offset, parm_count, parm_offset;
3054 struct smb_com_ntransact_rsp *pSMBr; 3328 struct smb_com_ntransact_rsp *pSMBr;
3329 u16 bcc;
3055 3330
3056 *pdatalen = 0; 3331 *pdatalen = 0;
3057 *pparmlen = 0; 3332 *pparmlen = 0;
@@ -3061,8 +3336,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3061 3336
3062 pSMBr = (struct smb_com_ntransact_rsp *)buf; 3337 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3063 3338
3064 /* ByteCount was converted from little endian in SendReceive */ 3339 bcc = get_bcc(&pSMBr->hdr);
3065 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 3340 end_of_smb = 2 /* sizeof byte count */ + bcc +
3066 (char *)&pSMBr->ByteCount; 3341 (char *)&pSMBr->ByteCount;
3067 3342
3068 data_offset = le32_to_cpu(pSMBr->DataOffset); 3343 data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3363,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3088 *ppdata, data_count, (data_count + *ppdata), 3363 *ppdata, data_count, (data_count + *ppdata),
3089 end_of_smb, pSMBr); 3364 end_of_smb, pSMBr);
3090 return -EINVAL; 3365 return -EINVAL;
3091 } else if (parm_count + data_count > pSMBr->ByteCount) { 3366 } else if (parm_count + data_count > bcc) {
3092 cFYI(1, "parm count and data count larger than SMB"); 3367 cFYI(1, "parm count and data count larger than SMB");
3093 return -EINVAL; 3368 return -EINVAL;
3094 } 3369 }
@@ -3099,7 +3374,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3099 3374
3100/* Get Security Descriptor (by handle) from remote server for a file or dir */ 3375/* Get Security Descriptor (by handle) from remote server for a file or dir */
3101int 3376int
3102CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3377CIFSSMBGetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid,
3103 struct cifs_ntsd **acl_inf, __u32 *pbuflen) 3378 struct cifs_ntsd **acl_inf, __u32 *pbuflen)
3104{ 3379{
3105 int rc = 0; 3380 int rc = 0;
@@ -3124,9 +3399,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3124 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | 3399 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
3125 CIFS_ACL_DACL); 3400 CIFS_ACL_DACL);
3126 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ 3401 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
3127 pSMB->hdr.smb_buf_length += 11; 3402 inc_rfc1001_len(pSMB, 11);
3128 iov[0].iov_base = (char *)pSMB; 3403 iov[0].iov_base = (char *)pSMB;
3129 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3404 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
3130 3405
3131 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 3406 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3132 0); 3407 0);
@@ -3191,7 +3466,7 @@ qsec_out:
3191} 3466}
3192 3467
3193int 3468int
3194CIFSSMBSetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3469CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid,
3195 struct cifs_ntsd *pntsd, __u32 acllen) 3470 struct cifs_ntsd *pntsd, __u32 acllen)
3196{ 3471{
3197 __u16 byte_count, param_count, data_count, param_offset, data_offset; 3472 __u16 byte_count, param_count, data_count, param_offset, data_offset;
@@ -3235,10 +3510,9 @@ setCifsAclRetry:
3235 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3510 memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
3236 (char *) pntsd, 3511 (char *) pntsd,
3237 acllen); 3512 acllen);
3238 pSMB->hdr.smb_buf_length += (byte_count + data_count); 3513 inc_rfc1001_len(pSMB, byte_count + data_count);
3239
3240 } else 3514 } else
3241 pSMB->hdr.smb_buf_length += byte_count; 3515 inc_rfc1001_len(pSMB, byte_count);
3242 3516
3243 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3517 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3244 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3518 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3258,7 +3532,7 @@ setCifsAclRetry:
3258 3532
3259/* Legacy Query Path Information call for lookup to old servers such 3533/* Legacy Query Path Information call for lookup to old servers such
3260 as Win9x/WinME */ 3534 as Win9x/WinME */
3261int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, 3535int SMBQueryInformation(const int xid, struct cifs_tcon *tcon,
3262 const unsigned char *searchName, 3536 const unsigned char *searchName,
3263 FILE_ALL_INFO *pFinfo, 3537 FILE_ALL_INFO *pFinfo,
3264 const struct nls_table *nls_codepage, int remap) 3538 const struct nls_table *nls_codepage, int remap)
@@ -3289,7 +3563,7 @@ QInfRetry:
3289 } 3563 }
3290 pSMB->BufferFormat = 0x04; 3564 pSMB->BufferFormat = 0x04;
3291 name_len++; /* account for buffer type byte */ 3565 name_len++; /* account for buffer type byte */
3292 pSMB->hdr.smb_buf_length += (__u16) name_len; 3566 inc_rfc1001_len(pSMB, (__u16)name_len);
3293 pSMB->ByteCount = cpu_to_le16(name_len); 3567 pSMB->ByteCount = cpu_to_le16(name_len);
3294 3568
3295 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3569 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3326,7 +3600,7 @@ QInfRetry:
3326} 3600}
3327 3601
3328int 3602int
3329CIFSSMBQFileInfo(const int xid, struct cifsTconInfo *tcon, 3603CIFSSMBQFileInfo(const int xid, struct cifs_tcon *tcon,
3330 u16 netfid, FILE_ALL_INFO *pFindData) 3604 u16 netfid, FILE_ALL_INFO *pFindData)
3331{ 3605{
3332 struct smb_t2_qfi_req *pSMB = NULL; 3606 struct smb_t2_qfi_req *pSMB = NULL;
@@ -3364,7 +3638,7 @@ QFileInfoRetry:
3364 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3638 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3365 pSMB->Pad = 0; 3639 pSMB->Pad = 0;
3366 pSMB->Fid = netfid; 3640 pSMB->Fid = netfid;
3367 pSMB->hdr.smb_buf_length += byte_count; 3641 inc_rfc1001_len(pSMB, byte_count);
3368 3642
3369 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3643 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3370 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3644 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3649,7 @@ QFileInfoRetry:
3375 3649
3376 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3650 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3377 rc = -EIO; 3651 rc = -EIO;
3378 else if (pSMBr->ByteCount < 40) 3652 else if (get_bcc(&pSMBr->hdr) < 40)
3379 rc = -EIO; /* bad smb */ 3653 rc = -EIO; /* bad smb */
3380 else if (pFindData) { 3654 else if (pFindData) {
3381 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3655 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3393,7 +3667,7 @@ QFileInfoRetry:
3393} 3667}
3394 3668
3395int 3669int
3396CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, 3670CIFSSMBQPathInfo(const int xid, struct cifs_tcon *tcon,
3397 const unsigned char *searchName, 3671 const unsigned char *searchName,
3398 FILE_ALL_INFO *pFindData, 3672 FILE_ALL_INFO *pFindData,
3399 int legacy /* old style infolevel */, 3673 int legacy /* old style infolevel */,
@@ -3451,7 +3725,7 @@ QPathInfoRetry:
3451 else 3725 else
3452 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3726 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3453 pSMB->Reserved4 = 0; 3727 pSMB->Reserved4 = 0;
3454 pSMB->hdr.smb_buf_length += byte_count; 3728 inc_rfc1001_len(pSMB, byte_count);
3455 pSMB->ByteCount = cpu_to_le16(byte_count); 3729 pSMB->ByteCount = cpu_to_le16(byte_count);
3456 3730
3457 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3731 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3737,9 @@ QPathInfoRetry:
3463 3737
3464 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3738 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3465 rc = -EIO; 3739 rc = -EIO;
3466 else if (!legacy && (pSMBr->ByteCount < 40)) 3740 else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
3467 rc = -EIO; /* bad smb */ 3741 rc = -EIO; /* bad smb */
3468 else if (legacy && (pSMBr->ByteCount < 24)) 3742 else if (legacy && get_bcc(&pSMBr->hdr) < 24)
3469 rc = -EIO; /* 24 or 26 expected but we do not read 3743 rc = -EIO; /* 24 or 26 expected but we do not read
3470 last field */ 3744 last field */
3471 else if (pFindData) { 3745 else if (pFindData) {
@@ -3494,7 +3768,7 @@ QPathInfoRetry:
3494} 3768}
3495 3769
3496int 3770int
3497CIFSSMBUnixQFileInfo(const int xid, struct cifsTconInfo *tcon, 3771CIFSSMBUnixQFileInfo(const int xid, struct cifs_tcon *tcon,
3498 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData) 3772 u16 netfid, FILE_UNIX_BASIC_INFO *pFindData)
3499{ 3773{
3500 struct smb_t2_qfi_req *pSMB = NULL; 3774 struct smb_t2_qfi_req *pSMB = NULL;
@@ -3532,7 +3806,7 @@ UnixQFileInfoRetry:
3532 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3806 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3533 pSMB->Pad = 0; 3807 pSMB->Pad = 0;
3534 pSMB->Fid = netfid; 3808 pSMB->Fid = netfid;
3535 pSMB->hdr.smb_buf_length += byte_count; 3809 inc_rfc1001_len(pSMB, byte_count);
3536 3810
3537 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3811 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3538 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3812 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3815,7 @@ UnixQFileInfoRetry:
3541 } else { /* decode response */ 3815 } else { /* decode response */
3542 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3816 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3543 3817
3544 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3818 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3545 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3819 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3546 "Unix Extensions can be disabled on mount " 3820 "Unix Extensions can be disabled on mount "
3547 "by specifying the nosfu mount option."); 3821 "by specifying the nosfu mount option.");
@@ -3563,7 +3837,7 @@ UnixQFileInfoRetry:
3563} 3837}
3564 3838
3565int 3839int
3566CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon, 3840CIFSSMBUnixQPathInfo(const int xid, struct cifs_tcon *tcon,
3567 const unsigned char *searchName, 3841 const unsigned char *searchName,
3568 FILE_UNIX_BASIC_INFO *pFindData, 3842 FILE_UNIX_BASIC_INFO *pFindData,
3569 const struct nls_table *nls_codepage, int remap) 3843 const struct nls_table *nls_codepage, int remap)
@@ -3617,7 +3891,7 @@ UnixQPathInfoRetry:
3617 pSMB->ParameterCount = pSMB->TotalParameterCount; 3891 pSMB->ParameterCount = pSMB->TotalParameterCount;
3618 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3892 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3619 pSMB->Reserved4 = 0; 3893 pSMB->Reserved4 = 0;
3620 pSMB->hdr.smb_buf_length += byte_count; 3894 inc_rfc1001_len(pSMB, byte_count);
3621 pSMB->ByteCount = cpu_to_le16(byte_count); 3895 pSMB->ByteCount = cpu_to_le16(byte_count);
3622 3896
3623 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3897 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3901,7 @@ UnixQPathInfoRetry:
3627 } else { /* decode response */ 3901 } else { /* decode response */
3628 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3902 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3629 3903
3630 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3904 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3631 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3905 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3632 "Unix Extensions can be disabled on mount " 3906 "Unix Extensions can be disabled on mount "
3633 "by specifying the nosfu mount option."); 3907 "by specifying the nosfu mount option.");
@@ -3649,7 +3923,7 @@ UnixQPathInfoRetry:
3649 3923
3650/* xid, tcon, searchName and codepage are input parms, rest are returned */ 3924/* xid, tcon, searchName and codepage are input parms, rest are returned */
3651int 3925int
3652CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 3926CIFSFindFirst(const int xid, struct cifs_tcon *tcon,
3653 const char *searchName, 3927 const char *searchName,
3654 const struct nls_table *nls_codepage, 3928 const struct nls_table *nls_codepage,
3655 __u16 *pnetfid, 3929 __u16 *pnetfid,
@@ -3731,7 +4005,7 @@ findFirstRetry:
3731 4005
3732 /* BB what should we set StorageType to? Does it matter? BB */ 4006 /* BB what should we set StorageType to? Does it matter? BB */
3733 pSMB->SearchStorageType = 0; 4007 pSMB->SearchStorageType = 0;
3734 pSMB->hdr.smb_buf_length += byte_count; 4008 inc_rfc1001_len(pSMB, byte_count);
3735 pSMB->ByteCount = cpu_to_le16(byte_count); 4009 pSMB->ByteCount = cpu_to_le16(byte_count);
3736 4010
3737 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4011 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3797,7 +4071,7 @@ findFirstRetry:
3797 return rc; 4071 return rc;
3798} 4072}
3799 4073
3800int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, 4074int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
3801 __u16 searchHandle, struct cifs_search_info *psrch_inf) 4075 __u16 searchHandle, struct cifs_search_info *psrch_inf)
3802{ 4076{
3803 TRANSACTION2_FNEXT_REQ *pSMB = NULL; 4077 TRANSACTION2_FNEXT_REQ *pSMB = NULL;
@@ -3860,7 +4134,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3860 byte_count = params + 1 /* pad */ ; 4134 byte_count = params + 1 /* pad */ ;
3861 pSMB->TotalParameterCount = cpu_to_le16(params); 4135 pSMB->TotalParameterCount = cpu_to_le16(params);
3862 pSMB->ParameterCount = pSMB->TotalParameterCount; 4136 pSMB->ParameterCount = pSMB->TotalParameterCount;
3863 pSMB->hdr.smb_buf_length += byte_count; 4137 inc_rfc1001_len(pSMB, byte_count);
3864 pSMB->ByteCount = cpu_to_le16(byte_count); 4138 pSMB->ByteCount = cpu_to_le16(byte_count);
3865 4139
3866 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4140 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3935,7 +4209,7 @@ FNext2_err_exit:
3935} 4209}
3936 4210
3937int 4211int
3938CIFSFindClose(const int xid, struct cifsTconInfo *tcon, 4212CIFSFindClose(const int xid, struct cifs_tcon *tcon,
3939 const __u16 searchHandle) 4213 const __u16 searchHandle)
3940{ 4214{
3941 int rc = 0; 4215 int rc = 0;
@@ -3967,7 +4241,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3967} 4241}
3968 4242
3969int 4243int
3970CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, 4244CIFSGetSrvInodeNumber(const int xid, struct cifs_tcon *tcon,
3971 const unsigned char *searchName, 4245 const unsigned char *searchName,
3972 __u64 *inode_number, 4246 __u64 *inode_number,
3973 const struct nls_table *nls_codepage, int remap) 4247 const struct nls_table *nls_codepage, int remap)
@@ -4022,7 +4296,7 @@ GetInodeNumberRetry:
4022 pSMB->ParameterCount = pSMB->TotalParameterCount; 4296 pSMB->ParameterCount = pSMB->TotalParameterCount;
4023 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); 4297 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
4024 pSMB->Reserved4 = 0; 4298 pSMB->Reserved4 = 0;
4025 pSMB->hdr.smb_buf_length += byte_count; 4299 inc_rfc1001_len(pSMB, byte_count);
4026 pSMB->ByteCount = cpu_to_le16(byte_count); 4300 pSMB->ByteCount = cpu_to_le16(byte_count);
4027 4301
4028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4302 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4306,8 @@ GetInodeNumberRetry:
4032 } else { 4306 } else {
4033 /* decode response */ 4307 /* decode response */
4034 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4308 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4035 if (rc || (pSMBr->ByteCount < 2))
4036 /* BB also check enough total bytes returned */ 4309 /* BB also check enough total bytes returned */
4310 if (rc || get_bcc(&pSMBr->hdr) < 2)
4037 /* If rc should we check for EOPNOSUPP and 4311 /* If rc should we check for EOPNOSUPP and
4038 disable the srvino flag? or in caller? */ 4312 disable the srvino flag? or in caller? */
4039 rc = -EIO; /* bad smb */ 4313 rc = -EIO; /* bad smb */
@@ -4169,7 +4443,7 @@ parse_DFS_referrals_exit:
4169} 4443}
4170 4444
4171int 4445int
4172CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, 4446CIFSGetDFSRefer(const int xid, struct cifs_ses *ses,
4173 const unsigned char *searchName, 4447 const unsigned char *searchName,
4174 struct dfs_info3_param **target_nodes, 4448 struct dfs_info3_param **target_nodes,
4175 unsigned int *num_of_nodes, 4449 unsigned int *num_of_nodes,
@@ -4218,7 +4492,7 @@ getDFSRetry:
4218 } 4492 }
4219 4493
4220 if (ses->server) { 4494 if (ses->server) {
4221 if (ses->server->secMode & 4495 if (ses->server->sec_mode &
4222 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 4496 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
4223 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 4497 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
4224 } 4498 }
@@ -4246,7 +4520,7 @@ getDFSRetry:
4246 pSMB->ParameterCount = cpu_to_le16(params); 4520 pSMB->ParameterCount = cpu_to_le16(params);
4247 pSMB->TotalParameterCount = pSMB->ParameterCount; 4521 pSMB->TotalParameterCount = pSMB->ParameterCount;
4248 pSMB->MaxReferralLevel = cpu_to_le16(3); 4522 pSMB->MaxReferralLevel = cpu_to_le16(3);
4249 pSMB->hdr.smb_buf_length += byte_count; 4523 inc_rfc1001_len(pSMB, byte_count);
4250 pSMB->ByteCount = cpu_to_le16(byte_count); 4524 pSMB->ByteCount = cpu_to_le16(byte_count);
4251 4525
4252 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4526 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4532,13 @@ getDFSRetry:
4258 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4532 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4259 4533
4260 /* BB Also check if enough total bytes returned? */ 4534 /* BB Also check if enough total bytes returned? */
4261 if (rc || (pSMBr->ByteCount < 17)) { 4535 if (rc || get_bcc(&pSMBr->hdr) < 17) {
4262 rc = -EIO; /* bad smb */ 4536 rc = -EIO; /* bad smb */
4263 goto GetDFSRefExit; 4537 goto GetDFSRefExit;
4264 } 4538 }
4265 4539
4266 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", 4540 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4267 pSMBr->ByteCount, 4541 get_bcc(&pSMBr->hdr),
4268 le16_to_cpu(pSMBr->t2.DataOffset)); 4542 le16_to_cpu(pSMBr->t2.DataOffset));
4269 4543
4270 /* parse returned result into more usable form */ 4544 /* parse returned result into more usable form */
@@ -4283,7 +4557,7 @@ GetDFSRefExit:
4283 4557
4284/* Query File System Info such as free space to old servers such as Win 9x */ 4558/* Query File System Info such as free space to old servers such as Win 9x */
4285int 4559int
4286SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData) 4560SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData)
4287{ 4561{
4288/* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */ 4562/* level 0x01 SMB_QUERY_FILE_SYSTEM_INFO */
4289 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4563 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4320,7 +4594,7 @@ oldQFSInfoRetry:
4320 pSMB->Reserved3 = 0; 4594 pSMB->Reserved3 = 0;
4321 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4595 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4322 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); 4596 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
4323 pSMB->hdr.smb_buf_length += byte_count; 4597 inc_rfc1001_len(pSMB, byte_count);
4324 pSMB->ByteCount = cpu_to_le16(byte_count); 4598 pSMB->ByteCount = cpu_to_le16(byte_count);
4325 4599
4326 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4600 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4604,12 @@ oldQFSInfoRetry:
4330 } else { /* decode response */ 4604 } else { /* decode response */
4331 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4605 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4332 4606
4333 if (rc || (pSMBr->ByteCount < 18)) 4607 if (rc || get_bcc(&pSMBr->hdr) < 18)
4334 rc = -EIO; /* bad smb */ 4608 rc = -EIO; /* bad smb */
4335 else { 4609 else {
4336 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4610 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4337 cFYI(1, "qfsinf resp BCC: %d Offset %d", 4611 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4338 pSMBr->ByteCount, data_offset); 4612 get_bcc(&pSMBr->hdr), data_offset);
4339 4613
4340 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4614 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4341 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4615 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4362,7 +4636,7 @@ oldQFSInfoRetry:
4362} 4636}
4363 4637
4364int 4638int
4365CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData) 4639CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, struct kstatfs *FSData)
4366{ 4640{
4367/* level 0x103 SMB_QUERY_FILE_SYSTEM_INFO */ 4641/* level 0x103 SMB_QUERY_FILE_SYSTEM_INFO */
4368 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4642 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4399,7 +4673,7 @@ QFSInfoRetry:
4399 pSMB->Reserved3 = 0; 4673 pSMB->Reserved3 = 0;
4400 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4674 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4401 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); 4675 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
4402 pSMB->hdr.smb_buf_length += byte_count; 4676 inc_rfc1001_len(pSMB, byte_count);
4403 pSMB->ByteCount = cpu_to_le16(byte_count); 4677 pSMB->ByteCount = cpu_to_le16(byte_count);
4404 4678
4405 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4679 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4683,7 @@ QFSInfoRetry:
4409 } else { /* decode response */ 4683 } else { /* decode response */
4410 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4684 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4411 4685
4412 if (rc || (pSMBr->ByteCount < 24)) 4686 if (rc || get_bcc(&pSMBr->hdr) < 24)
4413 rc = -EIO; /* bad smb */ 4687 rc = -EIO; /* bad smb */
4414 else { 4688 else {
4415 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4689 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4441,7 +4715,7 @@ QFSInfoRetry:
4441} 4715}
4442 4716
4443int 4717int
4444CIFSSMBQFSAttributeInfo(const int xid, struct cifsTconInfo *tcon) 4718CIFSSMBQFSAttributeInfo(const int xid, struct cifs_tcon *tcon)
4445{ 4719{
4446/* level 0x105 SMB_QUERY_FILE_SYSTEM_INFO */ 4720/* level 0x105 SMB_QUERY_FILE_SYSTEM_INFO */
4447 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4721 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4479,7 +4753,7 @@ QFSAttributeRetry:
4479 pSMB->Reserved3 = 0; 4753 pSMB->Reserved3 = 0;
4480 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4754 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4481 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); 4755 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
4482 pSMB->hdr.smb_buf_length += byte_count; 4756 inc_rfc1001_len(pSMB, byte_count);
4483 pSMB->ByteCount = cpu_to_le16(byte_count); 4757 pSMB->ByteCount = cpu_to_le16(byte_count);
4484 4758
4485 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4759 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4763,7 @@ QFSAttributeRetry:
4489 } else { /* decode response */ 4763 } else { /* decode response */
4490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4764 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4491 4765
4492 if (rc || (pSMBr->ByteCount < 13)) { 4766 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4493 /* BB also check if enough bytes returned */ 4767 /* BB also check if enough bytes returned */
4494 rc = -EIO; /* bad smb */ 4768 rc = -EIO; /* bad smb */
4495 } else { 4769 } else {
@@ -4511,7 +4785,7 @@ QFSAttributeRetry:
4511} 4785}
4512 4786
4513int 4787int
4514CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon) 4788CIFSSMBQFSDeviceInfo(const int xid, struct cifs_tcon *tcon)
4515{ 4789{
4516/* level 0x104 SMB_QUERY_FILE_SYSTEM_INFO */ 4790/* level 0x104 SMB_QUERY_FILE_SYSTEM_INFO */
4517 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4791 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4550,7 +4824,7 @@ QFSDeviceRetry:
4550 pSMB->Reserved3 = 0; 4824 pSMB->Reserved3 = 0;
4551 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4825 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4552 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); 4826 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
4553 pSMB->hdr.smb_buf_length += byte_count; 4827 inc_rfc1001_len(pSMB, byte_count);
4554 pSMB->ByteCount = cpu_to_le16(byte_count); 4828 pSMB->ByteCount = cpu_to_le16(byte_count);
4555 4829
4556 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4830 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4834,8 @@ QFSDeviceRetry:
4560 } else { /* decode response */ 4834 } else { /* decode response */
4561 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4835 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4562 4836
4563 if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO))) 4837 if (rc || get_bcc(&pSMBr->hdr) <
4838 sizeof(FILE_SYSTEM_DEVICE_INFO))
4564 rc = -EIO; /* bad smb */ 4839 rc = -EIO; /* bad smb */
4565 else { 4840 else {
4566 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4841 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4581,7 +4856,7 @@ QFSDeviceRetry:
4581} 4856}
4582 4857
4583int 4858int
4584CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) 4859CIFSSMBQFSUnixInfo(const int xid, struct cifs_tcon *tcon)
4585{ 4860{
4586/* level 0x200 SMB_QUERY_CIFS_UNIX_INFO */ 4861/* level 0x200 SMB_QUERY_CIFS_UNIX_INFO */
4587 TRANSACTION2_QFSI_REQ *pSMB = NULL; 4862 TRANSACTION2_QFSI_REQ *pSMB = NULL;
@@ -4619,7 +4894,7 @@ QFSUnixRetry:
4619 pSMB->Reserved3 = 0; 4894 pSMB->Reserved3 = 0;
4620 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4895 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4621 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); 4896 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
4622 pSMB->hdr.smb_buf_length += byte_count; 4897 inc_rfc1001_len(pSMB, byte_count);
4623 pSMB->ByteCount = cpu_to_le16(byte_count); 4898 pSMB->ByteCount = cpu_to_le16(byte_count);
4624 4899
4625 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4900 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4904,7 @@ QFSUnixRetry:
4629 } else { /* decode response */ 4904 } else { /* decode response */
4630 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4905 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4631 4906
4632 if (rc || (pSMBr->ByteCount < 13)) { 4907 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4633 rc = -EIO; /* bad smb */ 4908 rc = -EIO; /* bad smb */
4634 } else { 4909 } else {
4635 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4910 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4651,7 +4926,7 @@ QFSUnixRetry:
4651} 4926}
4652 4927
4653int 4928int
4654CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) 4929CIFSSMBSetFSUnixInfo(const int xid, struct cifs_tcon *tcon, __u64 cap)
4655{ 4930{
4656/* level 0x200 SMB_SET_CIFS_UNIX_INFO */ 4931/* level 0x200 SMB_SET_CIFS_UNIX_INFO */
4657 TRANSACTION2_SETFSI_REQ *pSMB = NULL; 4932 TRANSACTION2_SETFSI_REQ *pSMB = NULL;
@@ -4702,7 +4977,7 @@ SETFSUnixRetry:
4702 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); 4977 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
4703 pSMB->ClientUnixCap = cpu_to_le64(cap); 4978 pSMB->ClientUnixCap = cpu_to_le64(cap);
4704 4979
4705 pSMB->hdr.smb_buf_length += byte_count; 4980 inc_rfc1001_len(pSMB, byte_count);
4706 pSMB->ByteCount = cpu_to_le16(byte_count); 4981 pSMB->ByteCount = cpu_to_le16(byte_count);
4707 4982
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4983 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4725,7 +5000,7 @@ SETFSUnixRetry:
4725 5000
4726 5001
4727int 5002int
4728CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 5003CIFSSMBQFSPosixInfo(const int xid, struct cifs_tcon *tcon,
4729 struct kstatfs *FSData) 5004 struct kstatfs *FSData)
4730{ 5005{
4731/* level 0x201 SMB_QUERY_CIFS_POSIX_INFO */ 5006/* level 0x201 SMB_QUERY_CIFS_POSIX_INFO */
@@ -4764,7 +5039,7 @@ QFSPosixRetry:
4764 pSMB->Reserved3 = 0; 5039 pSMB->Reserved3 = 0;
4765 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 5040 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4766 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); 5041 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
4767 pSMB->hdr.smb_buf_length += byte_count; 5042 inc_rfc1001_len(pSMB, byte_count);
4768 pSMB->ByteCount = cpu_to_le16(byte_count); 5043 pSMB->ByteCount = cpu_to_le16(byte_count);
4769 5044
4770 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5045 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +5049,7 @@ QFSPosixRetry:
4774 } else { /* decode response */ 5049 } else { /* decode response */
4775 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5050 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4776 5051
4777 if (rc || (pSMBr->ByteCount < 13)) { 5052 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4778 rc = -EIO; /* bad smb */ 5053 rc = -EIO; /* bad smb */
4779 } else { 5054 } else {
4780 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 5055 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4818,7 +5093,7 @@ QFSPosixRetry:
4818 in Samba which this routine can run into */ 5093 in Samba which this routine can run into */
4819 5094
4820int 5095int
4821CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, 5096CIFSSMBSetEOF(const int xid, struct cifs_tcon *tcon, const char *fileName,
4822 __u64 size, bool SetAllocation, 5097 __u64 size, bool SetAllocation,
4823 const struct nls_table *nls_codepage, int remap) 5098 const struct nls_table *nls_codepage, int remap)
4824{ 5099{
@@ -4890,7 +5165,7 @@ SetEOFRetry:
4890 pSMB->ParameterCount = cpu_to_le16(params); 5165 pSMB->ParameterCount = cpu_to_le16(params);
4891 pSMB->TotalParameterCount = pSMB->ParameterCount; 5166 pSMB->TotalParameterCount = pSMB->ParameterCount;
4892 pSMB->Reserved4 = 0; 5167 pSMB->Reserved4 = 0;
4893 pSMB->hdr.smb_buf_length += byte_count; 5168 inc_rfc1001_len(pSMB, byte_count);
4894 parm_data->FileSize = cpu_to_le64(size); 5169 parm_data->FileSize = cpu_to_le64(size);
4895 pSMB->ByteCount = cpu_to_le16(byte_count); 5170 pSMB->ByteCount = cpu_to_le16(byte_count);
4896 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5171 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4907,7 +5182,7 @@ SetEOFRetry:
4907} 5182}
4908 5183
4909int 5184int
4910CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, 5185CIFSSMBSetFileSize(const int xid, struct cifs_tcon *tcon, __u64 size,
4911 __u16 fid, __u32 pid_of_opener, bool SetAllocation) 5186 __u16 fid, __u32 pid_of_opener, bool SetAllocation)
4912{ 5187{
4913 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5188 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -4969,7 +5244,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4969 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); 5244 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
4970 } 5245 }
4971 pSMB->Reserved4 = 0; 5246 pSMB->Reserved4 = 0;
4972 pSMB->hdr.smb_buf_length += byte_count; 5247 inc_rfc1001_len(pSMB, byte_count);
4973 pSMB->ByteCount = cpu_to_le16(byte_count); 5248 pSMB->ByteCount = cpu_to_le16(byte_count);
4974 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5249 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4975 if (rc) { 5250 if (rc) {
@@ -4989,7 +5264,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4989 time and resort to the original setpathinfo level which takes the ancient 5264 time and resort to the original setpathinfo level which takes the ancient
4990 DOS time format with 2 second granularity */ 5265 DOS time format with 2 second granularity */
4991int 5266int
4992CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, 5267CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon,
4993 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) 5268 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
4994{ 5269{
4995 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5270 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -5037,7 +5312,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5037 else 5312 else
5038 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5313 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5039 pSMB->Reserved4 = 0; 5314 pSMB->Reserved4 = 0;
5040 pSMB->hdr.smb_buf_length += byte_count; 5315 inc_rfc1001_len(pSMB, byte_count);
5041 pSMB->ByteCount = cpu_to_le16(byte_count); 5316 pSMB->ByteCount = cpu_to_le16(byte_count);
5042 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5317 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5043 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5318 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5051,7 +5326,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5051} 5326}
5052 5327
5053int 5328int
5054CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, 5329CIFSSMBSetFileDisposition(const int xid, struct cifs_tcon *tcon,
5055 bool delete_file, __u16 fid, __u32 pid_of_opener) 5330 bool delete_file, __u16 fid, __u32 pid_of_opener)
5056{ 5331{
5057 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5332 struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -5096,7 +5371,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5096 pSMB->Fid = fid; 5371 pSMB->Fid = fid;
5097 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); 5372 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
5098 pSMB->Reserved4 = 0; 5373 pSMB->Reserved4 = 0;
5099 pSMB->hdr.smb_buf_length += byte_count; 5374 inc_rfc1001_len(pSMB, byte_count);
5100 pSMB->ByteCount = cpu_to_le16(byte_count); 5375 pSMB->ByteCount = cpu_to_le16(byte_count);
5101 *data_offset = delete_file ? 1 : 0; 5376 *data_offset = delete_file ? 1 : 0;
5102 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5377 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5107,7 +5382,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5107} 5382}
5108 5383
5109int 5384int
5110CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, 5385CIFSSMBSetPathInfo(const int xid, struct cifs_tcon *tcon,
5111 const char *fileName, const FILE_BASIC_INFO *data, 5386 const char *fileName, const FILE_BASIC_INFO *data,
5112 const struct nls_table *nls_codepage, int remap) 5387 const struct nls_table *nls_codepage, int remap)
5113{ 5388{
@@ -5169,7 +5444,7 @@ SetTimesRetry:
5169 else 5444 else
5170 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5445 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5171 pSMB->Reserved4 = 0; 5446 pSMB->Reserved4 = 0;
5172 pSMB->hdr.smb_buf_length += byte_count; 5447 inc_rfc1001_len(pSMB, byte_count);
5173 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5448 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5174 pSMB->ByteCount = cpu_to_le16(byte_count); 5449 pSMB->ByteCount = cpu_to_le16(byte_count);
5175 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5450 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5191,7 +5466,7 @@ SetTimesRetry:
5191 handling it anyway and NT4 was what we thought it would be needed for 5466 handling it anyway and NT4 was what we thought it would be needed for
5192 Do not delete it until we prove whether needed for Win9x though */ 5467 Do not delete it until we prove whether needed for Win9x though */
5193int 5468int
5194CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName, 5469CIFSSMBSetAttrLegacy(int xid, struct cifs_tcon *tcon, char *fileName,
5195 __u16 dos_attrs, const struct nls_table *nls_codepage) 5470 __u16 dos_attrs, const struct nls_table *nls_codepage)
5196{ 5471{
5197 SETATTR_REQ *pSMB = NULL; 5472 SETATTR_REQ *pSMB = NULL;
@@ -5221,7 +5496,7 @@ SetAttrLgcyRetry:
5221 } 5496 }
5222 pSMB->attr = cpu_to_le16(dos_attrs); 5497 pSMB->attr = cpu_to_le16(dos_attrs);
5223 pSMB->BufferFormat = 0x04; 5498 pSMB->BufferFormat = 0x04;
5224 pSMB->hdr.smb_buf_length += name_len + 1; 5499 inc_rfc1001_len(pSMB, name_len + 1);
5225 pSMB->ByteCount = cpu_to_le16(name_len + 1); 5500 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5226 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5501 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5227 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5502 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5279,7 +5554,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5279} 5554}
5280 5555
5281int 5556int
5282CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, 5557CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon,
5283 const struct cifs_unix_set_info_args *args, 5558 const struct cifs_unix_set_info_args *args,
5284 u16 fid, u32 pid_of_opener) 5559 u16 fid, u32 pid_of_opener)
5285{ 5560{
@@ -5326,7 +5601,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5326 pSMB->Fid = fid; 5601 pSMB->Fid = fid;
5327 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5602 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5328 pSMB->Reserved4 = 0; 5603 pSMB->Reserved4 = 0;
5329 pSMB->hdr.smb_buf_length += byte_count; 5604 inc_rfc1001_len(pSMB, byte_count);
5330 pSMB->ByteCount = cpu_to_le16(byte_count); 5605 pSMB->ByteCount = cpu_to_le16(byte_count);
5331 5606
5332 cifs_fill_unix_set_info(data_offset, args); 5607 cifs_fill_unix_set_info(data_offset, args);
@@ -5342,7 +5617,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5342} 5617}
5343 5618
5344int 5619int
5345CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, 5620CIFSSMBUnixSetPathInfo(const int xid, struct cifs_tcon *tcon, char *fileName,
5346 const struct cifs_unix_set_info_args *args, 5621 const struct cifs_unix_set_info_args *args,
5347 const struct nls_table *nls_codepage, int remap) 5622 const struct nls_table *nls_codepage, int remap)
5348{ 5623{
@@ -5402,7 +5677,7 @@ setPermsRetry:
5402 pSMB->TotalDataCount = pSMB->DataCount; 5677 pSMB->TotalDataCount = pSMB->DataCount;
5403 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5678 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5404 pSMB->Reserved4 = 0; 5679 pSMB->Reserved4 = 0;
5405 pSMB->hdr.smb_buf_length += byte_count; 5680 inc_rfc1001_len(pSMB, byte_count);
5406 5681
5407 cifs_fill_unix_set_info(data_offset, args); 5682 cifs_fill_unix_set_info(data_offset, args);
5408 5683
@@ -5418,79 +5693,6 @@ setPermsRetry:
5418 return rc; 5693 return rc;
5419} 5694}
5420 5695
5421int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5422 const int notify_subdirs, const __u16 netfid,
5423 __u32 filter, struct file *pfile, int multishot,
5424 const struct nls_table *nls_codepage)
5425{
5426 int rc = 0;
5427 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5428 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5429 struct dir_notify_req *dnotify_req;
5430 int bytes_returned;
5431
5432 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5433 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5434 (void **) &pSMBr);
5435 if (rc)
5436 return rc;
5437
5438 pSMB->TotalParameterCount = 0 ;
5439 pSMB->TotalDataCount = 0;
5440 pSMB->MaxParameterCount = cpu_to_le32(2);
5441 /* BB find exact data count max from sess structure BB */
5442 pSMB->MaxDataCount = 0; /* same in little endian or be */
5443/* BB VERIFY verify which is correct for above BB */
5444 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5445 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5446
5447 pSMB->MaxSetupCount = 4;
5448 pSMB->Reserved = 0;
5449 pSMB->ParameterOffset = 0;
5450 pSMB->DataCount = 0;
5451 pSMB->DataOffset = 0;
5452 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5453 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5454 pSMB->ParameterCount = pSMB->TotalParameterCount;
5455 if (notify_subdirs)
5456 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5457 pSMB->Reserved2 = 0;
5458 pSMB->CompletionFilter = cpu_to_le32(filter);
5459 pSMB->Fid = netfid; /* file handle always le */
5460 pSMB->ByteCount = 0;
5461
5462 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5463 (struct smb_hdr *)pSMBr, &bytes_returned,
5464 CIFS_ASYNC_OP);
5465 if (rc) {
5466 cFYI(1, "Error in Notify = %d", rc);
5467 } else {
5468 /* Add file to outstanding requests */
5469 /* BB change to kmem cache alloc */
5470 dnotify_req = kmalloc(
5471 sizeof(struct dir_notify_req),
5472 GFP_KERNEL);
5473 if (dnotify_req) {
5474 dnotify_req->Pid = pSMB->hdr.Pid;
5475 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5476 dnotify_req->Mid = pSMB->hdr.Mid;
5477 dnotify_req->Tid = pSMB->hdr.Tid;
5478 dnotify_req->Uid = pSMB->hdr.Uid;
5479 dnotify_req->netfid = netfid;
5480 dnotify_req->pfile = pfile;
5481 dnotify_req->filter = filter;
5482 dnotify_req->multishot = multishot;
5483 spin_lock(&GlobalMid_Lock);
5484 list_add_tail(&dnotify_req->lhead,
5485 &GlobalDnotifyReqList);
5486 spin_unlock(&GlobalMid_Lock);
5487 } else
5488 rc = -ENOMEM;
5489 }
5490 cifs_buf_release(pSMB);
5491 return rc;
5492}
5493
5494#ifdef CONFIG_CIFS_XATTR 5696#ifdef CONFIG_CIFS_XATTR
5495/* 5697/*
5496 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common 5698 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5502,7 +5704,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5502 * the data isn't copied to it, but the length is returned. 5704 * the data isn't copied to it, but the length is returned.
5503 */ 5705 */
5504ssize_t 5706ssize_t
5505CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 5707CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon,
5506 const unsigned char *searchName, const unsigned char *ea_name, 5708 const unsigned char *searchName, const unsigned char *ea_name,
5507 char *EAData, size_t buf_size, 5709 char *EAData, size_t buf_size,
5508 const struct nls_table *nls_codepage, int remap) 5710 const struct nls_table *nls_codepage, int remap)
@@ -5560,7 +5762,7 @@ QAllEAsRetry:
5560 pSMB->ParameterCount = pSMB->TotalParameterCount; 5762 pSMB->ParameterCount = pSMB->TotalParameterCount;
5561 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); 5763 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5562 pSMB->Reserved4 = 0; 5764 pSMB->Reserved4 = 0;
5563 pSMB->hdr.smb_buf_length += byte_count; 5765 inc_rfc1001_len(pSMB, byte_count);
5564 pSMB->ByteCount = cpu_to_le16(byte_count); 5766 pSMB->ByteCount = cpu_to_le16(byte_count);
5565 5767
5566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5768 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5778,7 @@ QAllEAsRetry:
5576 of these trans2 responses */ 5778 of these trans2 responses */
5577 5779
5578 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5780 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5579 if (rc || (pSMBr->ByteCount < 4)) { 5781 if (rc || get_bcc(&pSMBr->hdr) < 4) {
5580 rc = -EIO; /* bad smb */ 5782 rc = -EIO; /* bad smb */
5581 goto QAllEAsOut; 5783 goto QAllEAsOut;
5582 } 5784 }
@@ -5683,7 +5885,7 @@ QAllEAsOut:
5683} 5885}
5684 5886
5685int 5887int
5686CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, const char *fileName, 5888CIFSSMBSetEA(const int xid, struct cifs_tcon *tcon, const char *fileName,
5687 const char *ea_name, const void *ea_value, 5889 const char *ea_name, const void *ea_value,
5688 const __u16 ea_value_len, const struct nls_table *nls_codepage, 5890 const __u16 ea_value_len, const struct nls_table *nls_codepage,
5689 int remap) 5891 int remap)
@@ -5773,7 +5975,7 @@ SetEARetry:
5773 pSMB->ParameterCount = cpu_to_le16(params); 5975 pSMB->ParameterCount = cpu_to_le16(params);
5774 pSMB->TotalParameterCount = pSMB->ParameterCount; 5976 pSMB->TotalParameterCount = pSMB->ParameterCount;
5775 pSMB->Reserved4 = 0; 5977 pSMB->Reserved4 = 0;
5776 pSMB->hdr.smb_buf_length += byte_count; 5978 inc_rfc1001_len(pSMB, byte_count);
5777 pSMB->ByteCount = cpu_to_le16(byte_count); 5979 pSMB->ByteCount = cpu_to_le16(byte_count);
5778 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5980 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5779 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5981 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5989,99 @@ SetEARetry:
5787 5989
5788 return rc; 5990 return rc;
5789} 5991}
5790
5791#endif 5992#endif
5993
5994#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
5995/*
5996 * Years ago the kernel added a "dnotify" function for Samba server,
5997 * to allow network clients (such as Windows) to display updated
5998 * lists of files in directory listings automatically when
5999 * files are added by one user when another user has the
6000 * same directory open on their desktop. The Linux cifs kernel
6001 * client hooked into the kernel side of this interface for
6002 * the same reason, but ironically when the VFS moved from
6003 * "dnotify" to "inotify" it became harder to plug in Linux
6004 * network file system clients (the most obvious use case
6005 * for notify interfaces is when multiple users can update
6006 * the contents of the same directory - exactly what network
6007 * file systems can do) although the server (Samba) could
6008 * still use it. For the short term we leave the worker
6009 * function ifdeffed out (below) until inotify is fixed
6010 * in the VFS to make it easier to plug in network file
6011 * system clients. If inotify turns out to be permanently
6012 * incompatible for network fs clients, we could instead simply
6013 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
6014 */
6015int CIFSSMBNotify(const int xid, struct cifs_tcon *tcon,
6016 const int notify_subdirs, const __u16 netfid,
6017 __u32 filter, struct file *pfile, int multishot,
6018 const struct nls_table *nls_codepage)
6019{
6020 int rc = 0;
6021 struct smb_com_transaction_change_notify_req *pSMB = NULL;
6022 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
6023 struct dir_notify_req *dnotify_req;
6024 int bytes_returned;
6025
6026 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
6027 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
6028 (void **) &pSMBr);
6029 if (rc)
6030 return rc;
6031
6032 pSMB->TotalParameterCount = 0 ;
6033 pSMB->TotalDataCount = 0;
6034 pSMB->MaxParameterCount = cpu_to_le32(2);
6035 /* BB find exact data count max from sess structure BB */
6036 pSMB->MaxDataCount = 0; /* same in little endian or be */
6037/* BB VERIFY verify which is correct for above BB */
6038 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
6039 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
6040
6041 pSMB->MaxSetupCount = 4;
6042 pSMB->Reserved = 0;
6043 pSMB->ParameterOffset = 0;
6044 pSMB->DataCount = 0;
6045 pSMB->DataOffset = 0;
6046 pSMB->SetupCount = 4; /* single byte does not need le conversion */
6047 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
6048 pSMB->ParameterCount = pSMB->TotalParameterCount;
6049 if (notify_subdirs)
6050 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
6051 pSMB->Reserved2 = 0;
6052 pSMB->CompletionFilter = cpu_to_le32(filter);
6053 pSMB->Fid = netfid; /* file handle always le */
6054 pSMB->ByteCount = 0;
6055
6056 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
6057 (struct smb_hdr *)pSMBr, &bytes_returned,
6058 CIFS_ASYNC_OP);
6059 if (rc) {
6060 cFYI(1, "Error in Notify = %d", rc);
6061 } else {
6062 /* Add file to outstanding requests */
6063 /* BB change to kmem cache alloc */
6064 dnotify_req = kmalloc(
6065 sizeof(struct dir_notify_req),
6066 GFP_KERNEL);
6067 if (dnotify_req) {
6068 dnotify_req->Pid = pSMB->hdr.Pid;
6069 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
6070 dnotify_req->Mid = pSMB->hdr.Mid;
6071 dnotify_req->Tid = pSMB->hdr.Tid;
6072 dnotify_req->Uid = pSMB->hdr.Uid;
6073 dnotify_req->netfid = netfid;
6074 dnotify_req->pfile = pfile;
6075 dnotify_req->filter = filter;
6076 dnotify_req->multishot = multishot;
6077 spin_lock(&GlobalMid_Lock);
6078 list_add_tail(&dnotify_req->lhead,
6079 &GlobalDnotifyReqList);
6080 spin_unlock(&GlobalMid_Lock);
6081 } else
6082 rc = -ENOMEM;
6083 }
6084 cifs_buf_release(pSMB);
6085 return rc;
6086}
6087#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 277262a8e82f..6d88b82537c3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -57,61 +57,6 @@
57 57
58extern mempool_t *cifs_req_poolp; 58extern mempool_t *cifs_req_poolp;
59 59
60struct smb_vol {
61 char *username;
62 char *password;
63 char *domainname;
64 char *UNC;
65 char *UNCip;
66 char *iocharset; /* local code page for mapping to and from Unicode */
67 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
68 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
69 uid_t cred_uid;
70 uid_t linux_uid;
71 gid_t linux_gid;
72 mode_t file_mode;
73 mode_t dir_mode;
74 unsigned secFlg;
75 bool retry:1;
76 bool intr:1;
77 bool setuids:1;
78 bool override_uid:1;
79 bool override_gid:1;
80 bool dynperm:1;
81 bool noperm:1;
82 bool no_psx_acl:1; /* set if posix acl support should be disabled */
83 bool cifs_acl:1;
84 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
85 bool server_ino:1; /* use inode numbers from server ie UniqueId */
86 bool direct_io:1;
87 bool strict_io:1; /* strict cache behavior */
88 bool remap:1; /* set to remap seven reserved chars in filenames */
89 bool posix_paths:1; /* unset to not ask for posix pathnames. */
90 bool no_linux_ext:1;
91 bool sfu_emul:1;
92 bool nullauth:1; /* attempt to authenticate with null user */
93 bool nocase:1; /* request case insensitive filenames */
94 bool nobrl:1; /* disable sending byte range locks to srv */
95 bool mand_lock:1; /* send mandatory not posix byte range lock reqs */
96 bool seal:1; /* request transport encryption on share */
97 bool nodfs:1; /* Do not request DFS, even if available */
98 bool local_lease:1; /* check leases only on local system, not remote */
99 bool noblocksnd:1;
100 bool noautotune:1;
101 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1;
105 unsigned int rsize;
106 unsigned int wsize;
107 bool sockopt_tcp_nodelay:1;
108 unsigned short int port;
109 unsigned long actimeo; /* attribute cache timeout (jiffies) */
110 char *prepath;
111 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
112 struct nls_table *local_nls;
113};
114
115/* FIXME: should these be tunable? */ 60/* FIXME: should these be tunable? */
116#define TLINK_ERROR_EXPIRE (1 * HZ) 61#define TLINK_ERROR_EXPIRE (1 * HZ)
117#define TLINK_IDLE_EXPIRE (600 * HZ) 62#define TLINK_IDLE_EXPIRE (600 * HZ)
@@ -134,9 +79,10 @@ cifs_reconnect(struct TCP_Server_Info *server)
134{ 79{
135 int rc = 0; 80 int rc = 0;
136 struct list_head *tmp, *tmp2; 81 struct list_head *tmp, *tmp2;
137 struct cifsSesInfo *ses; 82 struct cifs_ses *ses;
138 struct cifsTconInfo *tcon; 83 struct cifs_tcon *tcon;
139 struct mid_q_entry *mid_entry; 84 struct mid_q_entry *mid_entry;
85 struct list_head retry_list;
140 86
141 spin_lock(&GlobalMid_Lock); 87 spin_lock(&GlobalMid_Lock);
142 if (server->tcpStatus == CifsExiting) { 88 if (server->tcpStatus == CifsExiting) {
@@ -156,11 +102,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
156 cFYI(1, "%s: marking sessions and tcons for reconnect", __func__); 102 cFYI(1, "%s: marking sessions and tcons for reconnect", __func__);
157 spin_lock(&cifs_tcp_ses_lock); 103 spin_lock(&cifs_tcp_ses_lock);
158 list_for_each(tmp, &server->smb_ses_list) { 104 list_for_each(tmp, &server->smb_ses_list) {
159 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); 105 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
160 ses->need_reconnect = true; 106 ses->need_reconnect = true;
161 ses->ipc_tid = 0; 107 ses->ipc_tid = 0;
162 list_for_each(tmp2, &ses->tcon_list) { 108 list_for_each(tmp2, &ses->tcon_list) {
163 tcon = list_entry(tmp2, struct cifsTconInfo, tcon_list); 109 tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
164 tcon->need_reconnect = true; 110 tcon->need_reconnect = true;
165 } 111 }
166 } 112 }
@@ -188,16 +134,23 @@ cifs_reconnect(struct TCP_Server_Info *server)
188 mutex_unlock(&server->srv_mutex); 134 mutex_unlock(&server->srv_mutex);
189 135
190 /* mark submitted MIDs for retry and issue callback */ 136 /* mark submitted MIDs for retry and issue callback */
191 cFYI(1, "%s: issuing mid callbacks", __func__); 137 INIT_LIST_HEAD(&retry_list);
138 cFYI(1, "%s: moving mids to private list", __func__);
192 spin_lock(&GlobalMid_Lock); 139 spin_lock(&GlobalMid_Lock);
193 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 140 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
194 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 141 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
195 if (mid_entry->midState == MID_REQUEST_SUBMITTED) 142 if (mid_entry->midState == MID_REQUEST_SUBMITTED)
196 mid_entry->midState = MID_RETRY_NEEDED; 143 mid_entry->midState = MID_RETRY_NEEDED;
144 list_move(&mid_entry->qhead, &retry_list);
145 }
146 spin_unlock(&GlobalMid_Lock);
147
148 cFYI(1, "%s: issuing mid callbacks", __func__);
149 list_for_each_safe(tmp, tmp2, &retry_list) {
150 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
197 list_del_init(&mid_entry->qhead); 151 list_del_init(&mid_entry->qhead);
198 mid_entry->callback(mid_entry); 152 mid_entry->callback(mid_entry);
199 } 153 }
200 spin_unlock(&GlobalMid_Lock);
201 154
202 while (server->tcpStatus == CifsNeedReconnect) { 155 while (server->tcpStatus == CifsNeedReconnect) {
203 try_to_freeze(); 156 try_to_freeze();
@@ -316,19 +269,19 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
316 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); 269 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
317 270
318 /* fix up the BCC */ 271 /* fix up the BCC */
319 byte_count = get_bcc_le(pTargetSMB); 272 byte_count = get_bcc(pTargetSMB);
320 byte_count += total_in_buf2; 273 byte_count += total_in_buf2;
321 /* is the result too big for the field? */ 274 /* is the result too big for the field? */
322 if (byte_count > USHRT_MAX) 275 if (byte_count > USHRT_MAX)
323 return -EPROTO; 276 return -EPROTO;
324 put_bcc_le(byte_count, pTargetSMB); 277 put_bcc(byte_count, pTargetSMB);
325 278
326 byte_count = pTargetSMB->smb_buf_length; 279 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
327 byte_count += total_in_buf2; 280 byte_count += total_in_buf2;
328 /* don't allow buffer to overflow */ 281 /* don't allow buffer to overflow */
329 if (byte_count > CIFSMaxBufSize) 282 if (byte_count > CIFSMaxBufSize)
330 return -ENOBUFS; 283 return -ENOBUFS;
331 pTargetSMB->smb_buf_length = byte_count; 284 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
332 285
333 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); 286 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
334 287
@@ -495,8 +448,7 @@ incomplete_rcv:
495 /* Note that FC 1001 length is big endian on the wire, 448 /* Note that FC 1001 length is big endian on the wire,
496 but we convert it here so it is always manipulated 449 but we convert it here so it is always manipulated
497 as host byte order */ 450 as host byte order */
498 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 451 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
499 smb_buffer->smb_buf_length = pdu_length;
500 452
501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4); 453 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
502 454
@@ -672,12 +624,12 @@ multi_t2_fnd:
672 mid_entry->when_received = jiffies; 624 mid_entry->when_received = jiffies;
673#endif 625#endif
674 list_del_init(&mid_entry->qhead); 626 list_del_init(&mid_entry->qhead);
675 mid_entry->callback(mid_entry);
676 break; 627 break;
677 } 628 }
678 spin_unlock(&GlobalMid_Lock); 629 spin_unlock(&GlobalMid_Lock);
679 630
680 if (mid_entry != NULL) { 631 if (mid_entry != NULL) {
632 mid_entry->callback(mid_entry);
681 /* Was previous buf put in mpx struct for multi-rsp? */ 633 /* Was previous buf put in mpx struct for multi-rsp? */
682 if (!isMultiRsp) { 634 if (!isMultiRsp) {
683 /* smb buffer will be freed by user thread */ 635 /* smb buffer will be freed by user thread */
@@ -735,21 +687,31 @@ multi_t2_fnd:
735 sock_release(csocket); 687 sock_release(csocket);
736 server->ssocket = NULL; 688 server->ssocket = NULL;
737 } 689 }
738 /* buffer usuallly freed in free_mid - need to free it here on exit */ 690 /* buffer usually freed in free_mid - need to free it here on exit */
739 cifs_buf_release(bigbuf); 691 cifs_buf_release(bigbuf);
740 if (smallbuf) /* no sense logging a debug message if NULL */ 692 if (smallbuf) /* no sense logging a debug message if NULL */
741 cifs_small_buf_release(smallbuf); 693 cifs_small_buf_release(smallbuf);
742 694
743 if (!list_empty(&server->pending_mid_q)) { 695 if (!list_empty(&server->pending_mid_q)) {
696 struct list_head dispose_list;
697
698 INIT_LIST_HEAD(&dispose_list);
744 spin_lock(&GlobalMid_Lock); 699 spin_lock(&GlobalMid_Lock);
745 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 700 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
746 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 701 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
747 cFYI(1, "Clearing Mid 0x%x - issuing callback", 702 cFYI(1, "Clearing mid 0x%x", mid_entry->mid);
748 mid_entry->mid); 703 mid_entry->midState = MID_SHUTDOWN;
704 list_move(&mid_entry->qhead, &dispose_list);
705 }
706 spin_unlock(&GlobalMid_Lock);
707
708 /* now walk dispose list and issue callbacks */
709 list_for_each_safe(tmp, tmp2, &dispose_list) {
710 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
711 cFYI(1, "Callback mid 0x%x", mid_entry->mid);
749 list_del_init(&mid_entry->qhead); 712 list_del_init(&mid_entry->qhead);
750 mid_entry->callback(mid_entry); 713 mid_entry->callback(mid_entry);
751 } 714 }
752 spin_unlock(&GlobalMid_Lock);
753 /* 1/8th of sec is more than enough time for them to exit */ 715 /* 1/8th of sec is more than enough time for them to exit */
754 msleep(125); 716 msleep(125);
755 } 717 }
@@ -818,10 +780,11 @@ extract_hostname(const char *unc)
818} 780}
819 781
820static int 782static int
821cifs_parse_mount_options(char *options, const char *devname, 783cifs_parse_mount_options(const char *mountdata, const char *devname,
822 struct smb_vol *vol) 784 struct smb_vol *vol)
823{ 785{
824 char *value, *data, *end; 786 char *value, *data, *end;
787 char *mountdata_copy, *options;
825 unsigned int temp_len, i, j; 788 unsigned int temp_len, i, j;
826 char separator[2]; 789 char separator[2];
827 short int override_uid = -1; 790 short int override_uid = -1;
@@ -861,9 +824,14 @@ cifs_parse_mount_options(char *options, const char *devname,
861 824
862 vol->actimeo = CIFS_DEF_ACTIMEO; 825 vol->actimeo = CIFS_DEF_ACTIMEO;
863 826
864 if (!options) 827 if (!mountdata)
865 return 1; 828 goto cifs_parse_mount_err;
829
830 mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
831 if (!mountdata_copy)
832 goto cifs_parse_mount_err;
866 833
834 options = mountdata_copy;
867 end = options + strlen(options); 835 end = options + strlen(options);
868 if (strncmp(options, "sep=", 4) == 0) { 836 if (strncmp(options, "sep=", 4) == 0) {
869 if (options[4] != 0) { 837 if (options[4] != 0) {
@@ -889,17 +857,22 @@ cifs_parse_mount_options(char *options, const char *devname,
889 if (!value) { 857 if (!value) {
890 printk(KERN_WARNING 858 printk(KERN_WARNING
891 "CIFS: invalid or missing username\n"); 859 "CIFS: invalid or missing username\n");
892 return 1; /* needs_arg; */ 860 goto cifs_parse_mount_err;
893 } else if (!*value) { 861 } else if (!*value) {
894 /* null user, ie anonymous, authentication */ 862 /* null user, ie anonymous, authentication */
895 vol->nullauth = 1; 863 vol->nullauth = 1;
896 } 864 }
897 if (strnlen(value, MAX_USERNAME_SIZE) < 865 if (strnlen(value, MAX_USERNAME_SIZE) <
898 MAX_USERNAME_SIZE) { 866 MAX_USERNAME_SIZE) {
899 vol->username = value; 867 vol->username = kstrdup(value, GFP_KERNEL);
868 if (!vol->username) {
869 printk(KERN_WARNING "CIFS: no memory "
870 "for username\n");
871 goto cifs_parse_mount_err;
872 }
900 } else { 873 } else {
901 printk(KERN_WARNING "CIFS: username too long\n"); 874 printk(KERN_WARNING "CIFS: username too long\n");
902 return 1; 875 goto cifs_parse_mount_err;
903 } 876 }
904 } else if (strnicmp(data, "pass", 4) == 0) { 877 } else if (strnicmp(data, "pass", 4) == 0) {
905 if (!value) { 878 if (!value) {
@@ -963,7 +936,7 @@ cifs_parse_mount_options(char *options, const char *devname,
963 if (vol->password == NULL) { 936 if (vol->password == NULL) {
964 printk(KERN_WARNING "CIFS: no memory " 937 printk(KERN_WARNING "CIFS: no memory "
965 "for password\n"); 938 "for password\n");
966 return 1; 939 goto cifs_parse_mount_err;
967 } 940 }
968 for (i = 0, j = 0; i < temp_len; i++, j++) { 941 for (i = 0, j = 0; i < temp_len; i++, j++) {
969 vol->password[j] = value[i]; 942 vol->password[j] = value[i];
@@ -979,7 +952,7 @@ cifs_parse_mount_options(char *options, const char *devname,
979 if (vol->password == NULL) { 952 if (vol->password == NULL) {
980 printk(KERN_WARNING "CIFS: no memory " 953 printk(KERN_WARNING "CIFS: no memory "
981 "for password\n"); 954 "for password\n");
982 return 1; 955 goto cifs_parse_mount_err;
983 } 956 }
984 strcpy(vol->password, value); 957 strcpy(vol->password, value);
985 } 958 }
@@ -989,11 +962,16 @@ cifs_parse_mount_options(char *options, const char *devname,
989 vol->UNCip = NULL; 962 vol->UNCip = NULL;
990 } else if (strnlen(value, INET6_ADDRSTRLEN) < 963 } else if (strnlen(value, INET6_ADDRSTRLEN) <
991 INET6_ADDRSTRLEN) { 964 INET6_ADDRSTRLEN) {
992 vol->UNCip = value; 965 vol->UNCip = kstrdup(value, GFP_KERNEL);
966 if (!vol->UNCip) {
967 printk(KERN_WARNING "CIFS: no memory "
968 "for UNC IP\n");
969 goto cifs_parse_mount_err;
970 }
993 } else { 971 } else {
994 printk(KERN_WARNING "CIFS: ip address " 972 printk(KERN_WARNING "CIFS: ip address "
995 "too long\n"); 973 "too long\n");
996 return 1; 974 goto cifs_parse_mount_err;
997 } 975 }
998 } else if (strnicmp(data, "sec", 3) == 0) { 976 } else if (strnicmp(data, "sec", 3) == 0) {
999 if (!value || !*value) { 977 if (!value || !*value) {
@@ -1006,7 +984,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1006 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 984 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
1007 CIFSSEC_MAY_KRB5; */ 985 CIFSSEC_MAY_KRB5; */
1008 cERROR(1, "Krb5 cifs privacy not supported"); 986 cERROR(1, "Krb5 cifs privacy not supported");
1009 return 1; 987 goto cifs_parse_mount_err;
1010 } else if (strnicmp(value, "krb5", 4) == 0) { 988 } else if (strnicmp(value, "krb5", 4) == 0) {
1011 vol->secFlg |= CIFSSEC_MAY_KRB5; 989 vol->secFlg |= CIFSSEC_MAY_KRB5;
1012 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 990 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1036,7 +1014,16 @@ cifs_parse_mount_options(char *options, const char *devname,
1036 vol->nullauth = 1; 1014 vol->nullauth = 1;
1037 } else { 1015 } else {
1038 cERROR(1, "bad security option: %s", value); 1016 cERROR(1, "bad security option: %s", value);
1039 return 1; 1017 goto cifs_parse_mount_err;
1018 }
1019 } else if (strnicmp(data, "vers", 3) == 0) {
1020 if (!value || !*value) {
1021 cERROR(1, "no protocol version specified"
1022 " after vers= mount option");
1023 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1024 (strnicmp(value, "1", 1) == 0)) {
1025 /* this is the default */
1026 continue;
1040 } 1027 }
1041 } else if ((strnicmp(data, "unc", 3) == 0) 1028 } else if ((strnicmp(data, "unc", 3) == 0)
1042 || (strnicmp(data, "target", 6) == 0) 1029 || (strnicmp(data, "target", 6) == 0)
@@ -1044,12 +1031,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1044 if (!value || !*value) { 1031 if (!value || !*value) {
1045 printk(KERN_WARNING "CIFS: invalid path to " 1032 printk(KERN_WARNING "CIFS: invalid path to "
1046 "network resource\n"); 1033 "network resource\n");
1047 return 1; /* needs_arg; */ 1034 goto cifs_parse_mount_err;
1048 } 1035 }
1049 if ((temp_len = strnlen(value, 300)) < 300) { 1036 if ((temp_len = strnlen(value, 300)) < 300) {
1050 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1037 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1051 if (vol->UNC == NULL) 1038 if (vol->UNC == NULL)
1052 return 1; 1039 goto cifs_parse_mount_err;
1053 strcpy(vol->UNC, value); 1040 strcpy(vol->UNC, value);
1054 if (strncmp(vol->UNC, "//", 2) == 0) { 1041 if (strncmp(vol->UNC, "//", 2) == 0) {
1055 vol->UNC[0] = '\\'; 1042 vol->UNC[0] = '\\';
@@ -1058,27 +1045,32 @@ cifs_parse_mount_options(char *options, const char *devname,
1058 printk(KERN_WARNING 1045 printk(KERN_WARNING
1059 "CIFS: UNC Path does not begin " 1046 "CIFS: UNC Path does not begin "
1060 "with // or \\\\ \n"); 1047 "with // or \\\\ \n");
1061 return 1; 1048 goto cifs_parse_mount_err;
1062 } 1049 }
1063 } else { 1050 } else {
1064 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1051 printk(KERN_WARNING "CIFS: UNC name too long\n");
1065 return 1; 1052 goto cifs_parse_mount_err;
1066 } 1053 }
1067 } else if ((strnicmp(data, "domain", 3) == 0) 1054 } else if ((strnicmp(data, "domain", 3) == 0)
1068 || (strnicmp(data, "workgroup", 5) == 0)) { 1055 || (strnicmp(data, "workgroup", 5) == 0)) {
1069 if (!value || !*value) { 1056 if (!value || !*value) {
1070 printk(KERN_WARNING "CIFS: invalid domain name\n"); 1057 printk(KERN_WARNING "CIFS: invalid domain name\n");
1071 return 1; /* needs_arg; */ 1058 goto cifs_parse_mount_err;
1072 } 1059 }
1073 /* BB are there cases in which a comma can be valid in 1060 /* BB are there cases in which a comma can be valid in
1074 a domain name and need special handling? */ 1061 a domain name and need special handling? */
1075 if (strnlen(value, 256) < 256) { 1062 if (strnlen(value, 256) < 256) {
1076 vol->domainname = value; 1063 vol->domainname = kstrdup(value, GFP_KERNEL);
1064 if (!vol->domainname) {
1065 printk(KERN_WARNING "CIFS: no memory "
1066 "for domainname\n");
1067 goto cifs_parse_mount_err;
1068 }
1077 cFYI(1, "Domain name set"); 1069 cFYI(1, "Domain name set");
1078 } else { 1070 } else {
1079 printk(KERN_WARNING "CIFS: domain name too " 1071 printk(KERN_WARNING "CIFS: domain name too "
1080 "long\n"); 1072 "long\n");
1081 return 1; 1073 goto cifs_parse_mount_err;
1082 } 1074 }
1083 } else if (strnicmp(data, "srcaddr", 7) == 0) { 1075 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1084 vol->srcaddr.ss_family = AF_UNSPEC; 1076 vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1086,7 +1078,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1086 if (!value || !*value) { 1078 if (!value || !*value) {
1087 printk(KERN_WARNING "CIFS: srcaddr value" 1079 printk(KERN_WARNING "CIFS: srcaddr value"
1088 " not specified.\n"); 1080 " not specified.\n");
1089 return 1; /* needs_arg; */ 1081 goto cifs_parse_mount_err;
1090 } 1082 }
1091 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1083 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1092 value, strlen(value)); 1084 value, strlen(value));
@@ -1094,20 +1086,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1094 printk(KERN_WARNING "CIFS: Could not parse" 1086 printk(KERN_WARNING "CIFS: Could not parse"
1095 " srcaddr: %s\n", 1087 " srcaddr: %s\n",
1096 value); 1088 value);
1097 return 1; 1089 goto cifs_parse_mount_err;
1098 } 1090 }
1099 } else if (strnicmp(data, "prefixpath", 10) == 0) { 1091 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1100 if (!value || !*value) { 1092 if (!value || !*value) {
1101 printk(KERN_WARNING 1093 printk(KERN_WARNING
1102 "CIFS: invalid path prefix\n"); 1094 "CIFS: invalid path prefix\n");
1103 return 1; /* needs_argument */ 1095 goto cifs_parse_mount_err;
1104 } 1096 }
1105 if ((temp_len = strnlen(value, 1024)) < 1024) { 1097 if ((temp_len = strnlen(value, 1024)) < 1024) {
1106 if (value[0] != '/') 1098 if (value[0] != '/')
1107 temp_len++; /* missing leading slash */ 1099 temp_len++; /* missing leading slash */
1108 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); 1100 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1109 if (vol->prepath == NULL) 1101 if (vol->prepath == NULL)
1110 return 1; 1102 goto cifs_parse_mount_err;
1111 if (value[0] != '/') { 1103 if (value[0] != '/') {
1112 vol->prepath[0] = '/'; 1104 vol->prepath[0] = '/';
1113 strcpy(vol->prepath+1, value); 1105 strcpy(vol->prepath+1, value);
@@ -1116,24 +1108,33 @@ cifs_parse_mount_options(char *options, const char *devname,
1116 cFYI(1, "prefix path %s", vol->prepath); 1108 cFYI(1, "prefix path %s", vol->prepath);
1117 } else { 1109 } else {
1118 printk(KERN_WARNING "CIFS: prefix too long\n"); 1110 printk(KERN_WARNING "CIFS: prefix too long\n");
1119 return 1; 1111 goto cifs_parse_mount_err;
1120 } 1112 }
1121 } else if (strnicmp(data, "iocharset", 9) == 0) { 1113 } else if (strnicmp(data, "iocharset", 9) == 0) {
1122 if (!value || !*value) { 1114 if (!value || !*value) {
1123 printk(KERN_WARNING "CIFS: invalid iocharset " 1115 printk(KERN_WARNING "CIFS: invalid iocharset "
1124 "specified\n"); 1116 "specified\n");
1125 return 1; /* needs_arg; */ 1117 goto cifs_parse_mount_err;
1126 } 1118 }
1127 if (strnlen(value, 65) < 65) { 1119 if (strnlen(value, 65) < 65) {
1128 if (strnicmp(value, "default", 7)) 1120 if (strnicmp(value, "default", 7)) {
1129 vol->iocharset = value; 1121 vol->iocharset = kstrdup(value,
1122 GFP_KERNEL);
1123
1124 if (!vol->iocharset) {
1125 printk(KERN_WARNING "CIFS: no "
1126 "memory for"
1127 "charset\n");
1128 goto cifs_parse_mount_err;
1129 }
1130 }
1130 /* if iocharset not set then load_nls_default 1131 /* if iocharset not set then load_nls_default
1131 is used by caller */ 1132 is used by caller */
1132 cFYI(1, "iocharset set to %s", value); 1133 cFYI(1, "iocharset set to %s", value);
1133 } else { 1134 } else {
1134 printk(KERN_WARNING "CIFS: iocharset name " 1135 printk(KERN_WARNING "CIFS: iocharset name "
1135 "too long.\n"); 1136 "too long.\n");
1136 return 1; 1137 goto cifs_parse_mount_err;
1137 } 1138 }
1138 } else if (!strnicmp(data, "uid", 3) && value && *value) { 1139 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1139 vol->linux_uid = simple_strtoul(value, &value, 0); 1140 vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1246,7 +1247,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1246 if (vol->actimeo > CIFS_MAX_ACTIMEO) { 1247 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1247 cERROR(1, "CIFS: attribute cache" 1248 cERROR(1, "CIFS: attribute cache"
1248 "timeout too large"); 1249 "timeout too large");
1249 return 1; 1250 goto cifs_parse_mount_err;
1250 } 1251 }
1251 } 1252 }
1252 } else if (strnicmp(data, "credentials", 4) == 0) { 1253 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1358,6 +1359,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1358 vol->server_ino = 1; 1359 vol->server_ino = 1;
1359 } else if (strnicmp(data, "noserverino", 9) == 0) { 1360 } else if (strnicmp(data, "noserverino", 9) == 0) {
1360 vol->server_ino = 0; 1361 vol->server_ino = 0;
1362 } else if (strnicmp(data, "rwpidforward", 4) == 0) {
1363 vol->rwpidforward = 1;
1361 } else if (strnicmp(data, "cifsacl", 7) == 0) { 1364 } else if (strnicmp(data, "cifsacl", 7) == 0) {
1362 vol->cifs_acl = 1; 1365 vol->cifs_acl = 1;
1363 } else if (strnicmp(data, "nocifsacl", 9) == 0) { 1366 } else if (strnicmp(data, "nocifsacl", 9) == 0) {
@@ -1390,7 +1393,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1390#ifndef CONFIG_CIFS_FSCACHE 1393#ifndef CONFIG_CIFS_FSCACHE
1391 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1392 "kernel config option set"); 1395 "kernel config option set");
1393 return 1; 1396 goto cifs_parse_mount_err;
1394#endif 1397#endif
1395 vol->fsc = true; 1398 vol->fsc = true;
1396 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1399 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1405,12 +1408,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1405 if (devname == NULL) { 1408 if (devname == NULL) {
1406 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1409 printk(KERN_WARNING "CIFS: Missing UNC name for mount "
1407 "target\n"); 1410 "target\n");
1408 return 1; 1411 goto cifs_parse_mount_err;
1409 } 1412 }
1410 if ((temp_len = strnlen(devname, 300)) < 300) { 1413 if ((temp_len = strnlen(devname, 300)) < 300) {
1411 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1414 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1412 if (vol->UNC == NULL) 1415 if (vol->UNC == NULL)
1413 return 1; 1416 goto cifs_parse_mount_err;
1414 strcpy(vol->UNC, devname); 1417 strcpy(vol->UNC, devname);
1415 if (strncmp(vol->UNC, "//", 2) == 0) { 1418 if (strncmp(vol->UNC, "//", 2) == 0) {
1416 vol->UNC[0] = '\\'; 1419 vol->UNC[0] = '\\';
@@ -1418,21 +1421,21 @@ cifs_parse_mount_options(char *options, const char *devname,
1418 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1421 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1419 printk(KERN_WARNING "CIFS: UNC Path does not " 1422 printk(KERN_WARNING "CIFS: UNC Path does not "
1420 "begin with // or \\\\ \n"); 1423 "begin with // or \\\\ \n");
1421 return 1; 1424 goto cifs_parse_mount_err;
1422 } 1425 }
1423 value = strpbrk(vol->UNC+2, "/\\"); 1426 value = strpbrk(vol->UNC+2, "/\\");
1424 if (value) 1427 if (value)
1425 *value = '\\'; 1428 *value = '\\';
1426 } else { 1429 } else {
1427 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1430 printk(KERN_WARNING "CIFS: UNC name too long\n");
1428 return 1; 1431 goto cifs_parse_mount_err;
1429 } 1432 }
1430 } 1433 }
1431 1434
1432 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { 1435 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
1433 cERROR(1, "Multiuser mounts currently require krb5 " 1436 cERROR(1, "Multiuser mounts currently require krb5 "
1434 "authentication!"); 1437 "authentication!");
1435 return 1; 1438 goto cifs_parse_mount_err;
1436 } 1439 }
1437 1440
1438 if (vol->UNCip == NULL) 1441 if (vol->UNCip == NULL)
@@ -1450,7 +1453,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1450 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1453 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1451 "specified with no gid= option.\n"); 1454 "specified with no gid= option.\n");
1452 1455
1456 kfree(mountdata_copy);
1453 return 0; 1457 return 0;
1458
1459cifs_parse_mount_err:
1460 kfree(mountdata_copy);
1461 return 1;
1454} 1462}
1455 1463
1456/** Returns true if srcaddr isn't specified and rhs isn't 1464/** Returns true if srcaddr isn't specified and rhs isn't
@@ -1589,16 +1597,35 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1589 1597
1590 /* now check if signing mode is acceptable */ 1598 /* now check if signing mode is acceptable */
1591 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && 1599 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1592 (server->secMode & SECMODE_SIGN_REQUIRED)) 1600 (server->sec_mode & SECMODE_SIGN_REQUIRED))
1593 return false; 1601 return false;
1594 else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && 1602 else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
1595 (server->secMode & 1603 (server->sec_mode &
1596 (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) 1604 (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
1597 return false; 1605 return false;
1598 1606
1599 return true; 1607 return true;
1600} 1608}
1601 1609
1610static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr,
1611 struct smb_vol *vol)
1612{
1613 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
1614 return 0;
1615
1616 if (!match_address(server, addr,
1617 (struct sockaddr *)&vol->srcaddr))
1618 return 0;
1619
1620 if (!match_port(server, addr))
1621 return 0;
1622
1623 if (!match_security(server, vol))
1624 return 0;
1625
1626 return 1;
1627}
1628
1602static struct TCP_Server_Info * 1629static struct TCP_Server_Info *
1603cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) 1630cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1604{ 1631{
@@ -1606,17 +1633,7 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1606 1633
1607 spin_lock(&cifs_tcp_ses_lock); 1634 spin_lock(&cifs_tcp_ses_lock);
1608 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { 1635 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
1609 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) 1636 if (!match_server(server, addr, vol))
1610 continue;
1611
1612 if (!match_address(server, addr,
1613 (struct sockaddr *)&vol->srcaddr))
1614 continue;
1615
1616 if (!match_port(server, addr))
1617 continue;
1618
1619 if (!match_security(server, vol))
1620 continue; 1637 continue;
1621 1638
1622 ++server->srv_count; 1639 ++server->srv_count;
@@ -1810,32 +1827,39 @@ out_err:
1810 return ERR_PTR(rc); 1827 return ERR_PTR(rc);
1811} 1828}
1812 1829
1813static struct cifsSesInfo * 1830static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
1831{
1832 switch (ses->server->secType) {
1833 case Kerberos:
1834 if (vol->cred_uid != ses->cred_uid)
1835 return 0;
1836 break;
1837 default:
1838 /* anything else takes username/password */
1839 if (ses->user_name == NULL)
1840 return 0;
1841 if (strncmp(ses->user_name, vol->username,
1842 MAX_USERNAME_SIZE))
1843 return 0;
1844 if (strlen(vol->username) != 0 &&
1845 ses->password != NULL &&
1846 strncmp(ses->password,
1847 vol->password ? vol->password : "",
1848 MAX_PASSWORD_SIZE))
1849 return 0;
1850 }
1851 return 1;
1852}
1853
1854static struct cifs_ses *
1814cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) 1855cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1815{ 1856{
1816 struct cifsSesInfo *ses; 1857 struct cifs_ses *ses;
1817 1858
1818 spin_lock(&cifs_tcp_ses_lock); 1859 spin_lock(&cifs_tcp_ses_lock);
1819 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { 1860 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
1820 switch (server->secType) { 1861 if (!match_session(ses, vol))
1821 case Kerberos: 1862 continue;
1822 if (vol->cred_uid != ses->cred_uid)
1823 continue;
1824 break;
1825 default:
1826 /* anything else takes username/password */
1827 if (ses->user_name == NULL)
1828 continue;
1829 if (strncmp(ses->user_name, vol->username,
1830 MAX_USERNAME_SIZE))
1831 continue;
1832 if (strlen(vol->username) != 0 &&
1833 ses->password != NULL &&
1834 strncmp(ses->password,
1835 vol->password ? vol->password : "",
1836 MAX_PASSWORD_SIZE))
1837 continue;
1838 }
1839 ++ses->ses_count; 1863 ++ses->ses_count;
1840 spin_unlock(&cifs_tcp_ses_lock); 1864 spin_unlock(&cifs_tcp_ses_lock);
1841 return ses; 1865 return ses;
@@ -1845,7 +1869,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1845} 1869}
1846 1870
1847static void 1871static void
1848cifs_put_smb_ses(struct cifsSesInfo *ses) 1872cifs_put_smb_ses(struct cifs_ses *ses)
1849{ 1873{
1850 int xid; 1874 int xid;
1851 struct TCP_Server_Info *server = ses->server; 1875 struct TCP_Server_Info *server = ses->server;
@@ -1871,11 +1895,11 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1871 1895
1872static bool warned_on_ntlm; /* globals init to false automatically */ 1896static bool warned_on_ntlm; /* globals init to false automatically */
1873 1897
1874static struct cifsSesInfo * 1898static struct cifs_ses *
1875cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1899cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1876{ 1900{
1877 int rc = -ENOMEM, xid; 1901 int rc = -ENOMEM, xid;
1878 struct cifsSesInfo *ses; 1902 struct cifs_ses *ses;
1879 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 1903 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
1880 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 1904 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
1881 1905
@@ -1978,20 +2002,26 @@ get_ses_fail:
1978 return ERR_PTR(rc); 2002 return ERR_PTR(rc);
1979} 2003}
1980 2004
1981static struct cifsTconInfo * 2005static int match_tcon(struct cifs_tcon *tcon, const char *unc)
1982cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) 2006{
2007 if (tcon->tidStatus == CifsExiting)
2008 return 0;
2009 if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
2010 return 0;
2011 return 1;
2012}
2013
2014static struct cifs_tcon *
2015cifs_find_tcon(struct cifs_ses *ses, const char *unc)
1983{ 2016{
1984 struct list_head *tmp; 2017 struct list_head *tmp;
1985 struct cifsTconInfo *tcon; 2018 struct cifs_tcon *tcon;
1986 2019
1987 spin_lock(&cifs_tcp_ses_lock); 2020 spin_lock(&cifs_tcp_ses_lock);
1988 list_for_each(tmp, &ses->tcon_list) { 2021 list_for_each(tmp, &ses->tcon_list) {
1989 tcon = list_entry(tmp, struct cifsTconInfo, tcon_list); 2022 tcon = list_entry(tmp, struct cifs_tcon, tcon_list);
1990 if (tcon->tidStatus == CifsExiting) 2023 if (!match_tcon(tcon, unc))
1991 continue;
1992 if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
1993 continue; 2024 continue;
1994
1995 ++tcon->tc_count; 2025 ++tcon->tc_count;
1996 spin_unlock(&cifs_tcp_ses_lock); 2026 spin_unlock(&cifs_tcp_ses_lock);
1997 return tcon; 2027 return tcon;
@@ -2001,10 +2031,10 @@ cifs_find_tcon(struct cifsSesInfo *ses, const char *unc)
2001} 2031}
2002 2032
2003static void 2033static void
2004cifs_put_tcon(struct cifsTconInfo *tcon) 2034cifs_put_tcon(struct cifs_tcon *tcon)
2005{ 2035{
2006 int xid; 2036 int xid;
2007 struct cifsSesInfo *ses = tcon->ses; 2037 struct cifs_ses *ses = tcon->ses;
2008 2038
2009 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count); 2039 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count);
2010 spin_lock(&cifs_tcp_ses_lock); 2040 spin_lock(&cifs_tcp_ses_lock);
@@ -2025,11 +2055,11 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
2025 cifs_put_smb_ses(ses); 2055 cifs_put_smb_ses(ses);
2026} 2056}
2027 2057
2028static struct cifsTconInfo * 2058static struct cifs_tcon *
2029cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info) 2059cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
2030{ 2060{
2031 int rc, xid; 2061 int rc, xid;
2032 struct cifsTconInfo *tcon; 2062 struct cifs_tcon *tcon;
2033 2063
2034 tcon = cifs_find_tcon(ses, volume_info->UNC); 2064 tcon = cifs_find_tcon(ses, volume_info->UNC);
2035 if (tcon) { 2065 if (tcon) {
@@ -2118,8 +2148,102 @@ cifs_put_tlink(struct tcon_link *tlink)
2118 return; 2148 return;
2119} 2149}
2120 2150
2151static inline struct tcon_link *
2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb);
2153
2154static int
2155compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
2156{
2157 struct cifs_sb_info *old = CIFS_SB(sb);
2158 struct cifs_sb_info *new = mnt_data->cifs_sb;
2159
2160 if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK))
2161 return 0;
2162
2163 if ((old->mnt_cifs_flags & CIFS_MOUNT_MASK) !=
2164 (new->mnt_cifs_flags & CIFS_MOUNT_MASK))
2165 return 0;
2166
2167 if (old->rsize != new->rsize)
2168 return 0;
2169
2170 /*
2171 * We want to share sb only if we don't specify wsize or specified wsize
2172 * is greater or equal than existing one.
2173 */
2174 if (new->wsize && new->wsize < old->wsize)
2175 return 0;
2176
2177 if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid)
2178 return 0;
2179
2180 if (old->mnt_file_mode != new->mnt_file_mode ||
2181 old->mnt_dir_mode != new->mnt_dir_mode)
2182 return 0;
2183
2184 if (strcmp(old->local_nls->charset, new->local_nls->charset))
2185 return 0;
2186
2187 if (old->actimeo != new->actimeo)
2188 return 0;
2189
2190 return 1;
2191}
2192
2121int 2193int
2122get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, 2194cifs_match_super(struct super_block *sb, void *data)
2195{
2196 struct cifs_mnt_data *mnt_data = (struct cifs_mnt_data *)data;
2197 struct smb_vol *volume_info;
2198 struct cifs_sb_info *cifs_sb;
2199 struct TCP_Server_Info *tcp_srv;
2200 struct cifs_ses *ses;
2201 struct cifs_tcon *tcon;
2202 struct tcon_link *tlink;
2203 struct sockaddr_storage addr;
2204 int rc = 0;
2205
2206 memset(&addr, 0, sizeof(struct sockaddr_storage));
2207
2208 spin_lock(&cifs_tcp_ses_lock);
2209 cifs_sb = CIFS_SB(sb);
2210 tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
2211 if (IS_ERR(tlink)) {
2212 spin_unlock(&cifs_tcp_ses_lock);
2213 return rc;
2214 }
2215 tcon = tlink_tcon(tlink);
2216 ses = tcon->ses;
2217 tcp_srv = ses->server;
2218
2219 volume_info = mnt_data->vol;
2220
2221 if (!volume_info->UNCip || !volume_info->UNC)
2222 goto out;
2223
2224 rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
2225 volume_info->UNCip,
2226 strlen(volume_info->UNCip),
2227 volume_info->port);
2228 if (!rc)
2229 goto out;
2230
2231 if (!match_server(tcp_srv, (struct sockaddr *)&addr, volume_info) ||
2232 !match_session(ses, volume_info) ||
2233 !match_tcon(tcon, volume_info->UNC)) {
2234 rc = 0;
2235 goto out;
2236 }
2237
2238 rc = compare_mount_options(sb, mnt_data);
2239out:
2240 cifs_put_tlink(tlink);
2241 spin_unlock(&cifs_tcp_ses_lock);
2242 return rc;
2243}
2244
2245int
2246get_dfs_path(int xid, struct cifs_ses *pSesInfo, const char *old_path,
2123 const struct nls_table *nls_codepage, unsigned int *pnum_referrals, 2247 const struct nls_table *nls_codepage, unsigned int *pnum_referrals,
2124 struct dfs_info3_param **preferrals, int remap) 2248 struct dfs_info3_param **preferrals, int remap)
2125{ 2249{
@@ -2280,7 +2404,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2280 smb_buf = (struct smb_hdr *)ses_init_buf; 2404 smb_buf = (struct smb_hdr *)ses_init_buf;
2281 2405
2282 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 2406 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2283 smb_buf->smb_buf_length = 0x81000044; 2407 smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
2284 rc = smb_send(server, smb_buf, 0x44); 2408 rc = smb_send(server, smb_buf, 0x44);
2285 kfree(ses_init_buf); 2409 kfree(ses_init_buf);
2286 /* 2410 /*
@@ -2418,7 +2542,7 @@ ip_connect(struct TCP_Server_Info *server)
2418 return generic_ip_connect(server); 2542 return generic_ip_connect(server);
2419} 2543}
2420 2544
2421void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, 2545void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
2422 struct super_block *sb, struct smb_vol *vol_info) 2546 struct super_block *sb, struct smb_vol *vol_info)
2423{ 2547{
2424 /* if we are reconnecting then should we check to see if 2548 /* if we are reconnecting then should we check to see if
@@ -2447,7 +2571,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2447 2571
2448 if (!CIFSSMBQFSUnixInfo(xid, tcon)) { 2572 if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
2449 __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); 2573 __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
2450 2574 cFYI(1, "unix caps which server supports %lld", cap);
2451 /* check for reconnect case in which we do not 2575 /* check for reconnect case in which we do not
2452 want to change the mount behavior if we can avoid it */ 2576 want to change the mount behavior if we can avoid it */
2453 if (vol_info == NULL) { 2577 if (vol_info == NULL) {
@@ -2465,6 +2589,9 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2465 } 2589 }
2466 } 2590 }
2467 2591
2592 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
2593 cERROR(1, "per-share encryption not supported yet");
2594
2468 cap &= CIFS_UNIX_CAP_MASK; 2595 cap &= CIFS_UNIX_CAP_MASK;
2469 if (vol_info && vol_info->no_psx_acl) 2596 if (vol_info && vol_info->no_psx_acl)
2470 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2597 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
@@ -2483,12 +2610,6 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2483 CIFS_MOUNT_POSIX_PATHS; 2610 CIFS_MOUNT_POSIX_PATHS;
2484 } 2611 }
2485 2612
2486 /* We might be setting the path sep back to a different
2487 form if we are reconnecting and the server switched its
2488 posix path capability for this share */
2489 if (sb && (CIFS_SB(sb)->prepathlen > 0))
2490 CIFS_SB(sb)->prepath[0] = CIFS_DIR_SEP(CIFS_SB(sb));
2491
2492 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 2613 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
2493 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 2614 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
2494 CIFS_SB(sb)->rsize = 127 * 1024; 2615 CIFS_SB(sb)->rsize = 127 * 1024;
@@ -2513,6 +2634,10 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2513 cFYI(1, "very large read cap"); 2634 cFYI(1, "very large read cap");
2514 if (cap & CIFS_UNIX_LARGE_WRITE_CAP) 2635 if (cap & CIFS_UNIX_LARGE_WRITE_CAP)
2515 cFYI(1, "very large write cap"); 2636 cFYI(1, "very large write cap");
2637 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP)
2638 cFYI(1, "transport encryption cap");
2639 if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)
2640 cFYI(1, "mandatory transport encryption cap");
2516#endif /* CIFS_DEBUG2 */ 2641#endif /* CIFS_DEBUG2 */
2517 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 2642 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
2518 if (vol_info == NULL) { 2643 if (vol_info == NULL) {
@@ -2529,28 +2654,8 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2529 } 2654 }
2530} 2655}
2531 2656
2532static void 2657void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2533convert_delimiter(char *path, char delim) 2658 struct cifs_sb_info *cifs_sb)
2534{
2535 int i;
2536 char old_delim;
2537
2538 if (path == NULL)
2539 return;
2540
2541 if (delim == '/')
2542 old_delim = '\\';
2543 else
2544 old_delim = '/';
2545
2546 for (i = 0; path[i] != '\0'; i++) {
2547 if (path[i] == old_delim)
2548 path[i] = delim;
2549 }
2550}
2551
2552static void setup_cifs_sb(struct smb_vol *pvolume_info,
2553 struct cifs_sb_info *cifs_sb)
2554{ 2659{
2555 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); 2660 INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
2556 2661
@@ -2564,40 +2669,19 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2564 else /* default */ 2669 else /* default */
2565 cifs_sb->rsize = CIFSMaxBufSize; 2670 cifs_sb->rsize = CIFSMaxBufSize;
2566 2671
2567 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
2568 cERROR(1, "wsize %d too large, using 4096 instead",
2569 pvolume_info->wsize);
2570 cifs_sb->wsize = 4096;
2571 } else if (pvolume_info->wsize)
2572 cifs_sb->wsize = pvolume_info->wsize;
2573 else
2574 cifs_sb->wsize = min_t(const int,
2575 PAGEVEC_SIZE * PAGE_CACHE_SIZE,
2576 127*1024);
2577 /* old default of CIFSMaxBufSize was too small now
2578 that SMB Write2 can send multiple pages in kvec.
2579 RFC1001 does not describe what happens when frame
2580 bigger than 128K is sent so use that as max in
2581 conjunction with 52K kvec constraint on arch with 4K
2582 page size */
2583
2584 if (cifs_sb->rsize < 2048) { 2672 if (cifs_sb->rsize < 2048) {
2585 cifs_sb->rsize = 2048; 2673 cifs_sb->rsize = 2048;
2586 /* Windows ME may prefer this */ 2674 /* Windows ME may prefer this */
2587 cFYI(1, "readsize set to minimum: 2048"); 2675 cFYI(1, "readsize set to minimum: 2048");
2588 } 2676 }
2589 /* calculate prepath */ 2677
2590 cifs_sb->prepath = pvolume_info->prepath; 2678 /*
2591 if (cifs_sb->prepath) { 2679 * Temporarily set wsize for matching superblock. If we end up using
2592 cifs_sb->prepathlen = strlen(cifs_sb->prepath); 2680 * new sb then cifs_negotiate_wsize will later negotiate it downward
2593 /* we can not convert the / to \ in the path 2681 * if needed.
2594 separators in the prefixpath yet because we do not 2682 */
2595 know (until reset_cifs_unix_caps is called later) 2683 cifs_sb->wsize = pvolume_info->wsize;
2596 whether POSIX PATH CAP is available. We normalize 2684
2597 the / to \ after reset_cifs_unix_caps is called */
2598 pvolume_info->prepath = NULL;
2599 } else
2600 cifs_sb->prepathlen = 0;
2601 cifs_sb->mnt_uid = pvolume_info->linux_uid; 2685 cifs_sb->mnt_uid = pvolume_info->linux_uid;
2602 cifs_sb->mnt_gid = pvolume_info->linux_gid; 2686 cifs_sb->mnt_gid = pvolume_info->linux_gid;
2603 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2687 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
@@ -2606,6 +2690,7 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2606 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2690 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2607 2691
2608 cifs_sb->actimeo = pvolume_info->actimeo; 2692 cifs_sb->actimeo = pvolume_info->actimeo;
2693 cifs_sb->local_nls = pvolume_info->local_nls;
2609 2694
2610 if (pvolume_info->noperm) 2695 if (pvolume_info->noperm)
2611 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2696 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -2625,6 +2710,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2625 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; 2710 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
2626 if (pvolume_info->mand_lock) 2711 if (pvolume_info->mand_lock)
2627 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; 2712 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
2713 if (pvolume_info->rwpidforward)
2714 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
2628 if (pvolume_info->cifs_acl) 2715 if (pvolume_info->cifs_acl)
2629 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; 2716 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
2630 if (pvolume_info->override_uid) 2717 if (pvolume_info->override_uid)
@@ -2658,8 +2745,55 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2658 "mount option supported"); 2745 "mount option supported");
2659} 2746}
2660 2747
2748/*
2749 * When the server supports very large writes via POSIX extensions, we can
2750 * allow up to 2^24 - PAGE_CACHE_SIZE.
2751 *
2752 * Note that this might make for "interesting" allocation problems during
2753 * writeback however (as we have to allocate an array of pointers for the
2754 * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
2755 */
2756#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE)
2757
2758/*
2759 * When the server doesn't allow large posix writes, default to a wsize of
2760 * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size
2761 * described in RFC1001. This allows space for the header without going over
2762 * that by default.
2763 */
2764#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE)
2765
2766/*
2767 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
2768 * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
2769 * a single wsize request with a single call.
2770 */
2771#define CIFS_DEFAULT_WSIZE (1024 * 1024)
2772
2773static unsigned int
2774cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
2775{
2776 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
2777 struct TCP_Server_Info *server = tcon->ses->server;
2778 unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize :
2779 CIFS_DEFAULT_WSIZE;
2780
2781 /* can server support 24-bit write sizes? (via UNIX extensions) */
2782 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
2783 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE);
2784
2785 /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */
2786 if (!(server->capabilities & CAP_LARGE_WRITE_X))
2787 wsize = min_t(unsigned int, wsize, USHRT_MAX);
2788
2789 /* hard limit of CIFS_MAX_WSIZE */
2790 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
2791
2792 return wsize;
2793}
2794
2661static int 2795static int
2662is_path_accessible(int xid, struct cifsTconInfo *tcon, 2796is_path_accessible(int xid, struct cifs_tcon *tcon,
2663 struct cifs_sb_info *cifs_sb, const char *full_path) 2797 struct cifs_sb_info *cifs_sb, const char *full_path)
2664{ 2798{
2665 int rc; 2799 int rc;
@@ -2682,8 +2816,8 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
2682 return rc; 2816 return rc;
2683} 2817}
2684 2818
2685static void 2819void
2686cleanup_volume_info(struct smb_vol **pvolume_info) 2820cifs_cleanup_volume_info(struct smb_vol **pvolume_info)
2687{ 2821{
2688 struct smb_vol *volume_info; 2822 struct smb_vol *volume_info;
2689 2823
@@ -2691,8 +2825,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2691 return; 2825 return;
2692 2826
2693 volume_info = *pvolume_info; 2827 volume_info = *pvolume_info;
2828 kfree(volume_info->username);
2694 kzfree(volume_info->password); 2829 kzfree(volume_info->password);
2695 kfree(volume_info->UNC); 2830 kfree(volume_info->UNC);
2831 kfree(volume_info->UNCip);
2832 kfree(volume_info->domainname);
2833 kfree(volume_info->iocharset);
2696 kfree(volume_info->prepath); 2834 kfree(volume_info->prepath);
2697 kfree(volume_info); 2835 kfree(volume_info);
2698 *pvolume_info = NULL; 2836 *pvolume_info = NULL;
@@ -2709,55 +2847,78 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2709 char *full_path; 2847 char *full_path;
2710 2848
2711 int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); 2849 int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1);
2712 full_path = kmalloc(unc_len + cifs_sb->prepathlen + 1, GFP_KERNEL); 2850 full_path = kmalloc(unc_len + 1, GFP_KERNEL);
2713 if (full_path == NULL) 2851 if (full_path == NULL)
2714 return ERR_PTR(-ENOMEM); 2852 return ERR_PTR(-ENOMEM);
2715 2853
2716 strncpy(full_path, volume_info->UNC, unc_len); 2854 strncpy(full_path, volume_info->UNC, unc_len);
2717 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { 2855 full_path[unc_len] = 0; /* add trailing null */
2718 int i; 2856 convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
2719 for (i = 0; i < unc_len; i++) {
2720 if (full_path[i] == '\\')
2721 full_path[i] = '/';
2722 }
2723 }
2724
2725 if (cifs_sb->prepathlen)
2726 strncpy(full_path + unc_len, cifs_sb->prepath,
2727 cifs_sb->prepathlen);
2728
2729 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2730 return full_path; 2857 return full_path;
2731} 2858}
2732#endif
2733 2859
2734int 2860/*
2735cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2861 * Perform a dfs referral query for a share and (optionally) prefix
2736 char *mount_data_global, const char *devname) 2862 *
2863 * If a referral is found, cifs_sb->mountdata will be (re-)allocated
2864 * to a string containing updated options for the submount. Otherwise it
2865 * will be left untouched.
2866 *
2867 * Returns the rc from get_dfs_path to the caller, which can be used to
2868 * determine whether there were referrals.
2869 */
2870static int
2871expand_dfs_referral(int xid, struct cifs_ses *pSesInfo,
2872 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2873 int check_prefix)
2737{ 2874{
2738 int rc; 2875 int rc;
2739 int xid;
2740 struct smb_vol *volume_info;
2741 struct cifsSesInfo *pSesInfo;
2742 struct cifsTconInfo *tcon;
2743 struct TCP_Server_Info *srvTcp;
2744 char *full_path;
2745 char *mount_data = mount_data_global;
2746 struct tcon_link *tlink;
2747#ifdef CONFIG_CIFS_DFS_UPCALL
2748 struct dfs_info3_param *referrals = NULL;
2749 unsigned int num_referrals = 0; 2876 unsigned int num_referrals = 0;
2750 int referral_walks_count = 0; 2877 struct dfs_info3_param *referrals = NULL;
2751try_mount_again: 2878 char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
2879
2880 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2881 if (IS_ERR(full_path))
2882 return PTR_ERR(full_path);
2883
2884 /* For DFS paths, skip the first '\' of the UNC */
2885 ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
2886
2887 rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
2888 &num_referrals, &referrals,
2889 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2890
2891 if (!rc && num_referrals > 0) {
2892 char *fake_devname = NULL;
2893
2894 mdata = cifs_compose_mount_options(cifs_sb->mountdata,
2895 full_path + 1, referrals,
2896 &fake_devname);
2897
2898 free_dfs_info_array(referrals, num_referrals);
2899 kfree(fake_devname);
2900
2901 if (cifs_sb->mountdata != NULL)
2902 kfree(cifs_sb->mountdata);
2903
2904 if (IS_ERR(mdata)) {
2905 rc = PTR_ERR(mdata);
2906 mdata = NULL;
2907 }
2908 cifs_sb->mountdata = mdata;
2909 }
2910 kfree(full_path);
2911 return rc;
2912}
2752#endif 2913#endif
2753 rc = 0;
2754 tcon = NULL;
2755 pSesInfo = NULL;
2756 srvTcp = NULL;
2757 full_path = NULL;
2758 tlink = NULL;
2759 2914
2760 xid = GetXid(); 2915int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
2916 const char *devname)
2917{
2918 struct smb_vol *volume_info;
2919 int rc = 0;
2920
2921 *pvolume_info = NULL;
2761 2922
2762 volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); 2923 volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL);
2763 if (!volume_info) { 2924 if (!volume_info) {
@@ -2765,7 +2926,8 @@ try_mount_again:
2765 goto out; 2926 goto out;
2766 } 2927 }
2767 2928
2768 if (cifs_parse_mount_options(mount_data, devname, volume_info)) { 2929 if (cifs_parse_mount_options(mount_data, devname,
2930 volume_info)) {
2769 rc = -EINVAL; 2931 rc = -EINVAL;
2770 goto out; 2932 goto out;
2771 } 2933 }
@@ -2797,7 +2959,46 @@ try_mount_again:
2797 goto out; 2959 goto out;
2798 } 2960 }
2799 } 2961 }
2800 cifs_sb->local_nls = volume_info->local_nls; 2962
2963 *pvolume_info = volume_info;
2964 return rc;
2965out:
2966 cifs_cleanup_volume_info(&volume_info);
2967 return rc;
2968}
2969
2970int
2971cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2972 struct smb_vol *volume_info, const char *devname)
2973{
2974 int rc = 0;
2975 int xid;
2976 struct cifs_ses *pSesInfo;
2977 struct cifs_tcon *tcon;
2978 struct TCP_Server_Info *srvTcp;
2979 char *full_path;
2980 struct tcon_link *tlink;
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 int referral_walks_count = 0;
2983try_mount_again:
2984 /* cleanup activities if we're chasing a referral */
2985 if (referral_walks_count) {
2986 if (tcon)
2987 cifs_put_tcon(tcon);
2988 else if (pSesInfo)
2989 cifs_put_smb_ses(pSesInfo);
2990
2991 cifs_cleanup_volume_info(&volume_info);
2992 FreeXid(xid);
2993 }
2994#endif
2995 tcon = NULL;
2996 pSesInfo = NULL;
2997 srvTcp = NULL;
2998 full_path = NULL;
2999 tlink = NULL;
3000
3001 xid = GetXid();
2801 3002
2802 /* get a reference to a tcp session */ 3003 /* get a reference to a tcp session */
2803 srvTcp = cifs_get_tcp_session(volume_info); 3004 srvTcp = cifs_get_tcp_session(volume_info);
@@ -2814,7 +3015,6 @@ try_mount_again:
2814 goto mount_fail_check; 3015 goto mount_fail_check;
2815 } 3016 }
2816 3017
2817 setup_cifs_sb(volume_info, cifs_sb);
2818 if (pSesInfo->capabilities & CAP_LARGE_FILES) 3018 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2819 sb->s_maxbytes = MAX_LFS_FILESIZE; 3019 sb->s_maxbytes = MAX_LFS_FILESIZE;
2820 else 3020 else
@@ -2831,40 +3031,59 @@ try_mount_again:
2831 goto remote_path_check; 3031 goto remote_path_check;
2832 } 3032 }
2833 3033
2834 /* do not care if following two calls succeed - informational */
2835 if (!tcon->ipc) {
2836 CIFSSMBQFSDeviceInfo(xid, tcon);
2837 CIFSSMBQFSAttributeInfo(xid, tcon);
2838 }
2839
2840 /* tell server which Unix caps we support */ 3034 /* tell server which Unix caps we support */
2841 if (tcon->ses->capabilities & CAP_UNIX) 3035 if (tcon->ses->capabilities & CAP_UNIX) {
2842 /* reset of caps checks mount to see if unix extensions 3036 /* reset of caps checks mount to see if unix extensions
2843 disabled for just this mount */ 3037 disabled for just this mount */
2844 reset_cifs_unix_caps(xid, tcon, sb, volume_info); 3038 reset_cifs_unix_caps(xid, tcon, sb, volume_info);
2845 else 3039 if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
3040 (le64_to_cpu(tcon->fsUnixInfo.Capability) &
3041 CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
3042 rc = -EACCES;
3043 goto mount_fail_check;
3044 }
3045 } else
2846 tcon->unix_ext = 0; /* server does not support them */ 3046 tcon->unix_ext = 0; /* server does not support them */
2847 3047
2848 /* convert forward to back slashes in prepath here if needed */ 3048 /* do not care if following two calls succeed - informational */
2849 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3049 if (!tcon->ipc) {
2850 convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); 3050 CIFSSMBQFSDeviceInfo(xid, tcon);
3051 CIFSSMBQFSAttributeInfo(xid, tcon);
3052 }
2851 3053
2852 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 3054 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2853 cifs_sb->rsize = 1024 * 127; 3055 cifs_sb->rsize = 1024 * 127;
2854 cFYI(DBG2, "no very large read support, rsize now 127K"); 3056 cFYI(DBG2, "no very large read support, rsize now 127K");
2855 } 3057 }
2856 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2857 cifs_sb->wsize = min(cifs_sb->wsize,
2858 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2859 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) 3058 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X))
2860 cifs_sb->rsize = min(cifs_sb->rsize, 3059 cifs_sb->rsize = min(cifs_sb->rsize,
2861 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 3060 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2862 3061
3062 cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info);
3063
2863remote_path_check: 3064remote_path_check:
2864 /* check if a whole path (including prepath) is not remote */ 3065#ifdef CONFIG_CIFS_DFS_UPCALL
3066 /*
3067 * Perform an unconditional check for whether there are DFS
3068 * referrals for this path without prefix, to provide support
3069 * for DFS referrals from w2k8 servers which don't seem to respond
3070 * with PATH_NOT_COVERED to requests that include the prefix.
3071 * Chase the referral if found, otherwise continue normally.
3072 */
3073 if (referral_walks_count == 0) {
3074 int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
3075 cifs_sb, false);
3076 if (!refrc) {
3077 referral_walks_count++;
3078 goto try_mount_again;
3079 }
3080 }
3081#endif
3082
3083 /* check if a whole path is not remote */
2865 if (!rc && tcon) { 3084 if (!rc && tcon) {
2866 /* build_path_to_root works only when we have a valid tcon */ 3085 /* build_path_to_root works only when we have a valid tcon */
2867 full_path = cifs_build_path_to_root(cifs_sb, tcon); 3086 full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon);
2868 if (full_path == NULL) { 3087 if (full_path == NULL) {
2869 rc = -ENOMEM; 3088 rc = -ENOMEM;
2870 goto mount_fail_check; 3089 goto mount_fail_check;
@@ -2890,50 +3109,15 @@ remote_path_check:
2890 rc = -ELOOP; 3109 rc = -ELOOP;
2891 goto mount_fail_check; 3110 goto mount_fail_check;
2892 } 3111 }
2893 /* convert forward to back slashes in prepath here if needed */
2894 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2895 convert_delimiter(cifs_sb->prepath,
2896 CIFS_DIR_SEP(cifs_sb));
2897 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2898 if (IS_ERR(full_path)) {
2899 rc = PTR_ERR(full_path);
2900 goto mount_fail_check;
2901 }
2902
2903 cFYI(1, "Getting referral for: %s", full_path);
2904 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2905 cifs_sb->local_nls, &num_referrals, &referrals,
2906 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2907 if (!rc && num_referrals > 0) {
2908 char *fake_devname = NULL;
2909 3112
2910 if (mount_data != mount_data_global) 3113 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
2911 kfree(mount_data); 3114 true);
2912 3115
2913 mount_data = cifs_compose_mount_options( 3116 if (!rc) {
2914 cifs_sb->mountdata, full_path + 1,
2915 referrals, &fake_devname);
2916
2917 free_dfs_info_array(referrals, num_referrals);
2918 kfree(fake_devname);
2919 kfree(full_path);
2920
2921 if (IS_ERR(mount_data)) {
2922 rc = PTR_ERR(mount_data);
2923 mount_data = NULL;
2924 goto mount_fail_check;
2925 }
2926
2927 if (tcon)
2928 cifs_put_tcon(tcon);
2929 else if (pSesInfo)
2930 cifs_put_smb_ses(pSesInfo);
2931
2932 cleanup_volume_info(&volume_info);
2933 referral_walks_count++; 3117 referral_walks_count++;
2934 FreeXid(xid);
2935 goto try_mount_again; 3118 goto try_mount_again;
2936 } 3119 }
3120 goto mount_fail_check;
2937#else /* No DFS support, return error on mount */ 3121#else /* No DFS support, return error on mount */
2938 rc = -EOPNOTSUPP; 3122 rc = -EOPNOTSUPP;
2939#endif 3123#endif
@@ -2966,8 +3150,6 @@ remote_path_check:
2966mount_fail_check: 3150mount_fail_check:
2967 /* on error free sesinfo and tcon struct if needed */ 3151 /* on error free sesinfo and tcon struct if needed */
2968 if (rc) { 3152 if (rc) {
2969 if (mount_data != mount_data_global)
2970 kfree(mount_data);
2971 /* If find_unc succeeded then rc == 0 so we can not end */ 3153 /* If find_unc succeeded then rc == 0 so we can not end */
2972 /* up accidentally freeing someone elses tcon struct */ 3154 /* up accidentally freeing someone elses tcon struct */
2973 if (tcon) 3155 if (tcon)
@@ -2985,14 +3167,13 @@ mount_fail_check:
2985 password will be freed at unmount time) */ 3167 password will be freed at unmount time) */
2986out: 3168out:
2987 /* zero out password before freeing */ 3169 /* zero out password before freeing */
2988 cleanup_volume_info(&volume_info);
2989 FreeXid(xid); 3170 FreeXid(xid);
2990 return rc; 3171 return rc;
2991} 3172}
2992 3173
2993int 3174int
2994CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, 3175CIFSTCon(unsigned int xid, struct cifs_ses *ses,
2995 const char *tree, struct cifsTconInfo *tcon, 3176 const char *tree, struct cifs_tcon *tcon,
2996 const struct nls_table *nls_codepage) 3177 const struct nls_table *nls_codepage)
2997{ 3178{
2998 struct smb_hdr *smb_buffer; 3179 struct smb_hdr *smb_buffer;
@@ -3024,7 +3205,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3024 pSMB->AndXCommand = 0xFF; 3205 pSMB->AndXCommand = 0xFF;
3025 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); 3206 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
3026 bcc_ptr = &pSMB->Password[0]; 3207 bcc_ptr = &pSMB->Password[0];
3027 if ((ses->server->secMode) & SECMODE_USER) { 3208 if ((ses->server->sec_mode) & SECMODE_USER) {
3028 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3209 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3029 *bcc_ptr = 0; /* password is null byte */ 3210 *bcc_ptr = 0; /* password is null byte */
3030 bcc_ptr++; /* skip password */ 3211 bcc_ptr++; /* skip password */
@@ -3041,7 +3222,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3041 if ((global_secflags & CIFSSEC_MAY_LANMAN) && 3222 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
3042 (ses->server->secType == LANMAN)) 3223 (ses->server->secType == LANMAN))
3043 calc_lanman_hash(tcon->password, ses->server->cryptkey, 3224 calc_lanman_hash(tcon->password, ses->server->cryptkey,
3044 ses->server->secMode & 3225 ses->server->sec_mode &
3045 SECMODE_PW_ENCRYPT ? true : false, 3226 SECMODE_PW_ENCRYPT ? true : false,
3046 bcc_ptr); 3227 bcc_ptr);
3047 else 3228 else
@@ -3057,7 +3238,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3057 } 3238 }
3058 } 3239 }
3059 3240
3060 if (ses->server->secMode & 3241 if (ses->server->sec_mode &
3061 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 3242 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
3062 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 3243 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
3063 3244
@@ -3083,7 +3264,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3083 bcc_ptr += strlen("?????"); 3264 bcc_ptr += strlen("?????");
3084 bcc_ptr += 1; 3265 bcc_ptr += 1;
3085 count = bcc_ptr - &pSMB->Password[0]; 3266 count = bcc_ptr - &pSMB->Password[0];
3086 pSMB->hdr.smb_buf_length += count; 3267 pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
3268 pSMB->hdr.smb_buf_length) + count);
3087 pSMB->ByteCount = cpu_to_le16(count); 3269 pSMB->ByteCount = cpu_to_le16(count);
3088 3270
3089 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 3271 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3152,7 +3334,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3152 struct rb_root *root = &cifs_sb->tlink_tree; 3334 struct rb_root *root = &cifs_sb->tlink_tree;
3153 struct rb_node *node; 3335 struct rb_node *node;
3154 struct tcon_link *tlink; 3336 struct tcon_link *tlink;
3155 char *tmp;
3156 3337
3157 cancel_delayed_work_sync(&cifs_sb->prune_tlinks); 3338 cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
3158 3339
@@ -3169,15 +3350,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3169 } 3350 }
3170 spin_unlock(&cifs_sb->tlink_tree_lock); 3351 spin_unlock(&cifs_sb->tlink_tree_lock);
3171 3352
3172 tmp = cifs_sb->prepath;
3173 cifs_sb->prepathlen = 0;
3174 cifs_sb->prepath = NULL;
3175 kfree(tmp);
3176
3177 return 0; 3353 return 0;
3178} 3354}
3179 3355
3180int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses) 3356int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3181{ 3357{
3182 int rc = 0; 3358 int rc = 0;
3183 struct TCP_Server_Info *server = ses->server; 3359 struct TCP_Server_Info *server = ses->server;
@@ -3207,7 +3383,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses)
3207} 3383}
3208 3384
3209 3385
3210int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, 3386int cifs_setup_session(unsigned int xid, struct cifs_ses *ses,
3211 struct nls_table *nls_info) 3387 struct nls_table *nls_info)
3212{ 3388{
3213 int rc = 0; 3389 int rc = 0;
@@ -3219,7 +3395,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3219 ses->capabilities &= (~CAP_UNIX); 3395 ses->capabilities &= (~CAP_UNIX);
3220 3396
3221 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3397 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3222 server->secMode, server->capabilities, server->timeAdj); 3398 server->sec_mode, server->capabilities, server->timeAdj);
3223 3399
3224 rc = CIFS_SessSetup(xid, ses, nls_info); 3400 rc = CIFS_SessSetup(xid, ses, nls_info);
3225 if (rc) { 3401 if (rc) {
@@ -3251,14 +3427,16 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
3251 return rc; 3427 return rc;
3252} 3428}
3253 3429
3254static struct cifsTconInfo * 3430static struct cifs_tcon *
3255cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) 3431cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3256{ 3432{
3257 struct cifsTconInfo *master_tcon = cifs_sb_master_tcon(cifs_sb); 3433 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
3258 struct cifsSesInfo *ses; 3434 struct cifs_ses *ses;
3259 struct cifsTconInfo *tcon = NULL; 3435 struct cifs_tcon *tcon = NULL;
3260 struct smb_vol *vol_info; 3436 struct smb_vol *vol_info;
3261 char username[MAX_USERNAME_SIZE + 1]; 3437 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3438 /* We used to have this as MAX_USERNAME which is */
3439 /* way too big now (256 instead of 32) */
3262 3440
3263 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3441 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3264 if (vol_info == NULL) { 3442 if (vol_info == NULL) {
@@ -3287,7 +3465,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3287 3465
3288 ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); 3466 ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
3289 if (IS_ERR(ses)) { 3467 if (IS_ERR(ses)) {
3290 tcon = (struct cifsTconInfo *)ses; 3468 tcon = (struct cifs_tcon *)ses;
3291 cifs_put_tcp_session(master_tcon->ses->server); 3469 cifs_put_tcp_session(master_tcon->ses->server);
3292 goto out; 3470 goto out;
3293 } 3471 }
@@ -3312,7 +3490,7 @@ cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3312 return cifs_sb->master_tlink; 3490 return cifs_sb->master_tlink;
3313} 3491}
3314 3492
3315struct cifsTconInfo * 3493struct cifs_tcon *
3316cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) 3494cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
3317{ 3495{
3318 return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); 3496 return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 9ea65cf36714..81914df47ef1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -50,12 +50,11 @@ build_path_from_dentry(struct dentry *direntry)
50{ 50{
51 struct dentry *temp; 51 struct dentry *temp;
52 int namelen; 52 int namelen;
53 int pplen;
54 int dfsplen; 53 int dfsplen;
55 char *full_path; 54 char *full_path;
56 char dirsep; 55 char dirsep;
57 struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); 56 struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
58 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 57 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
59 58
60 if (direntry == NULL) 59 if (direntry == NULL)
61 return NULL; /* not much we can do if dentry is freed and 60 return NULL; /* not much we can do if dentry is freed and
@@ -63,13 +62,12 @@ build_path_from_dentry(struct dentry *direntry)
63 when the server crashed */ 62 when the server crashed */
64 63
65 dirsep = CIFS_DIR_SEP(cifs_sb); 64 dirsep = CIFS_DIR_SEP(cifs_sb);
66 pplen = cifs_sb->prepathlen;
67 if (tcon->Flags & SMB_SHARE_IS_IN_DFS) 65 if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
68 dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); 66 dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
69 else 67 else
70 dfsplen = 0; 68 dfsplen = 0;
71cifs_bp_rename_retry: 69cifs_bp_rename_retry:
72 namelen = pplen + dfsplen; 70 namelen = dfsplen;
73 for (temp = direntry; !IS_ROOT(temp);) { 71 for (temp = direntry; !IS_ROOT(temp);) {
74 namelen += (1 + temp->d_name.len); 72 namelen += (1 + temp->d_name.len);
75 temp = temp->d_parent; 73 temp = temp->d_parent;
@@ -100,7 +98,7 @@ cifs_bp_rename_retry:
100 return NULL; 98 return NULL;
101 } 99 }
102 } 100 }
103 if (namelen != pplen + dfsplen) { 101 if (namelen != dfsplen) {
104 cERROR(1, "did not end path lookup where expected namelen is %d", 102 cERROR(1, "did not end path lookup where expected namelen is %d",
105 namelen); 103 namelen);
106 /* presumably this is only possible if racing with a rename 104 /* presumably this is only possible if racing with a rename
@@ -126,7 +124,6 @@ cifs_bp_rename_retry:
126 } 124 }
127 } 125 }
128 } 126 }
129 strncpy(full_path + dfsplen, CIFS_SB(direntry->d_sb)->prepath, pplen);
130 return full_path; 127 return full_path;
131} 128}
132 129
@@ -152,7 +149,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
152 __u16 fileHandle; 149 __u16 fileHandle;
153 struct cifs_sb_info *cifs_sb; 150 struct cifs_sb_info *cifs_sb;
154 struct tcon_link *tlink; 151 struct tcon_link *tlink;
155 struct cifsTconInfo *tcon; 152 struct cifs_tcon *tcon;
156 char *full_path = NULL; 153 char *full_path = NULL;
157 FILE_ALL_INFO *buf = NULL; 154 FILE_ALL_INFO *buf = NULL;
158 struct inode *newinode = NULL; 155 struct inode *newinode = NULL;
@@ -356,7 +353,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
356 int xid; 353 int xid;
357 struct cifs_sb_info *cifs_sb; 354 struct cifs_sb_info *cifs_sb;
358 struct tcon_link *tlink; 355 struct tcon_link *tlink;
359 struct cifsTconInfo *pTcon; 356 struct cifs_tcon *pTcon;
357 struct cifs_io_parms io_parms;
360 char *full_path = NULL; 358 char *full_path = NULL;
361 struct inode *newinode = NULL; 359 struct inode *newinode = NULL;
362 int oplock = 0; 360 int oplock = 0;
@@ -439,16 +437,19 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
439 * timestamps in, but we can reuse it safely */ 437 * timestamps in, but we can reuse it safely */
440 438
441 pdev = (struct win_dev *)buf; 439 pdev = (struct win_dev *)buf;
440 io_parms.netfid = fileHandle;
441 io_parms.pid = current->tgid;
442 io_parms.tcon = pTcon;
443 io_parms.offset = 0;
444 io_parms.length = sizeof(struct win_dev);
442 if (S_ISCHR(mode)) { 445 if (S_ISCHR(mode)) {
443 memcpy(pdev->type, "IntxCHR", 8); 446 memcpy(pdev->type, "IntxCHR", 8);
444 pdev->major = 447 pdev->major =
445 cpu_to_le64(MAJOR(device_number)); 448 cpu_to_le64(MAJOR(device_number));
446 pdev->minor = 449 pdev->minor =
447 cpu_to_le64(MINOR(device_number)); 450 cpu_to_le64(MINOR(device_number));
448 rc = CIFSSMBWrite(xid, pTcon, 451 rc = CIFSSMBWrite(xid, &io_parms,
449 fileHandle, 452 &bytes_written, (char *)pdev,
450 sizeof(struct win_dev),
451 0, &bytes_written, (char *)pdev,
452 NULL, 0); 453 NULL, 0);
453 } else if (S_ISBLK(mode)) { 454 } else if (S_ISBLK(mode)) {
454 memcpy(pdev->type, "IntxBLK", 8); 455 memcpy(pdev->type, "IntxBLK", 8);
@@ -456,10 +457,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
456 cpu_to_le64(MAJOR(device_number)); 457 cpu_to_le64(MAJOR(device_number));
457 pdev->minor = 458 pdev->minor =
458 cpu_to_le64(MINOR(device_number)); 459 cpu_to_le64(MINOR(device_number));
459 rc = CIFSSMBWrite(xid, pTcon, 460 rc = CIFSSMBWrite(xid, &io_parms,
460 fileHandle, 461 &bytes_written, (char *)pdev,
461 sizeof(struct win_dev),
462 0, &bytes_written, (char *)pdev,
463 NULL, 0); 462 NULL, 0);
464 } /* else if (S_ISFIFO) */ 463 } /* else if (S_ISFIFO) */
465 CIFSSMBClose(xid, pTcon, fileHandle); 464 CIFSSMBClose(xid, pTcon, fileHandle);
@@ -486,7 +485,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
486 bool posix_open = false; 485 bool posix_open = false;
487 struct cifs_sb_info *cifs_sb; 486 struct cifs_sb_info *cifs_sb;
488 struct tcon_link *tlink; 487 struct tcon_link *tlink;
489 struct cifsTconInfo *pTcon; 488 struct cifs_tcon *pTcon;
490 struct cifsFileInfo *cfile; 489 struct cifsFileInfo *cfile;
491 struct inode *newInode = NULL; 490 struct inode *newInode = NULL;
492 char *full_path = NULL; 491 char *full_path = NULL;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h" 46#include "cifsfs.h"
47 47
48#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CIFS_NFSD_EXPORT
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
63 .encode_fs = */ 63 .encode_fs = */
64}; 64};
65 65
66#endif /* EXPERIMENTAL */ 66#endif /* CIFS_NFSD_EXPORT */
67 67
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..bb71471a4d9d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -114,7 +114,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
115 struct cifs_fattr fattr; 115 struct cifs_fattr fattr;
116 struct tcon_link *tlink; 116 struct tcon_link *tlink;
117 struct cifsTconInfo *tcon; 117 struct cifs_tcon *tcon;
118 118
119 cFYI(1, "posix open %s", full_path); 119 cFYI(1, "posix open %s", full_path);
120 120
@@ -168,7 +168,7 @@ posix_open_ret:
168 168
169static int 169static int
170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 170cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock, 171 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid) 172 __u16 *pnetfid, int xid)
173{ 173{
174 int rc; 174 int rc;
@@ -285,7 +285,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
285void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 285void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
286{ 286{
287 struct inode *inode = cifs_file->dentry->d_inode; 287 struct inode *inode = cifs_file->dentry->d_inode;
288 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); 288 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
289 struct cifsInodeInfo *cifsi = CIFS_I(inode); 289 struct cifsInodeInfo *cifsi = CIFS_I(inode);
290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
291 struct cifsLockInfo *li, *tmp; 291 struct cifsLockInfo *li, *tmp;
@@ -343,7 +343,7 @@ int cifs_open(struct inode *inode, struct file *file)
343 int xid; 343 int xid;
344 __u32 oplock; 344 __u32 oplock;
345 struct cifs_sb_info *cifs_sb; 345 struct cifs_sb_info *cifs_sb;
346 struct cifsTconInfo *tcon; 346 struct cifs_tcon *tcon;
347 struct tcon_link *tlink; 347 struct tcon_link *tlink;
348 struct cifsFileInfo *pCifsFile = NULL; 348 struct cifsFileInfo *pCifsFile = NULL;
349 char *full_path = NULL; 349 char *full_path = NULL;
@@ -457,7 +457,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
457 int xid; 457 int xid;
458 __u32 oplock; 458 __u32 oplock;
459 struct cifs_sb_info *cifs_sb; 459 struct cifs_sb_info *cifs_sb;
460 struct cifsTconInfo *tcon; 460 struct cifs_tcon *tcon;
461 struct cifsInodeInfo *pCifsInode; 461 struct cifsInodeInfo *pCifsInode;
462 struct inode *inode; 462 struct inode *inode;
463 char *full_path = NULL; 463 char *full_path = NULL;
@@ -596,7 +596,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
596 xid = GetXid(); 596 xid = GetXid();
597 597
598 if (pCFileStruct) { 598 if (pCFileStruct) {
599 struct cifsTconInfo *pTcon = tlink_tcon(pCFileStruct->tlink); 599 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
600 600
601 cFYI(1, "Freeing private data in close dir"); 601 cFYI(1, "Freeing private data in close dir");
602 spin_lock(&cifs_file_list_lock); 602 spin_lock(&cifs_file_list_lock);
@@ -653,7 +653,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
653 __u64 length; 653 __u64 length;
654 bool wait_flag = false; 654 bool wait_flag = false;
655 struct cifs_sb_info *cifs_sb; 655 struct cifs_sb_info *cifs_sb;
656 struct cifsTconInfo *tcon; 656 struct cifs_tcon *tcon;
657 __u16 netfid; 657 __u16 netfid;
658 __u8 lockType = LOCKING_ANDX_LARGE_FILES; 658 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
659 bool posix_locking = 0; 659 bool posix_locking = 0;
@@ -725,8 +725,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
725 else 725 else
726 posix_lock_type = CIFS_WRLCK; 726 posix_lock_type = CIFS_WRLCK;
727 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */, 727 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
728 length, pfLock, 728 length, pfLock, posix_lock_type,
729 posix_lock_type, wait_flag); 729 wait_flag);
730 FreeXid(xid); 730 FreeXid(xid);
731 return rc; 731 return rc;
732 } 732 }
@@ -797,8 +797,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
797 posix_lock_type = CIFS_UNLCK; 797 posix_lock_type = CIFS_UNLCK;
798 798
799 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, 799 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
800 length, pfLock, 800 length, pfLock, posix_lock_type,
801 posix_lock_type, wait_flag); 801 wait_flag);
802 } else { 802 } else {
803 struct cifsFileInfo *fid = file->private_data; 803 struct cifsFileInfo *fid = file->private_data;
804 804
@@ -857,96 +857,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
857 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
858} 858}
859 859
860ssize_t cifs_user_write(struct file *file, const char __user *write_data, 860static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
861 size_t write_size, loff_t *poffset)
862{
863 struct inode *inode = file->f_path.dentry->d_inode;
864 int rc = 0;
865 unsigned int bytes_written = 0;
866 unsigned int total_written;
867 struct cifs_sb_info *cifs_sb;
868 struct cifsTconInfo *pTcon;
869 int xid;
870 struct cifsFileInfo *open_file;
871 struct cifsInodeInfo *cifsi = CIFS_I(inode);
872
873 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
874
875 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
876 *poffset, file->f_path.dentry->d_name.name); */
877
878 if (file->private_data == NULL)
879 return -EBADF;
880
881 open_file = file->private_data;
882 pTcon = tlink_tcon(open_file->tlink);
883
884 rc = generic_write_checks(file, poffset, &write_size, 0);
885 if (rc)
886 return rc;
887
888 xid = GetXid();
889
890 for (total_written = 0; write_size > total_written;
891 total_written += bytes_written) {
892 rc = -EAGAIN;
893 while (rc == -EAGAIN) {
894 if (file->private_data == NULL) {
895 /* file has been closed on us */
896 FreeXid(xid);
897 /* if we have gotten here we have written some data
898 and blocked, and the file has been freed on us while
899 we blocked so return what we managed to write */
900 return total_written;
901 }
902 if (open_file->invalidHandle) {
903 /* we could deadlock if we called
904 filemap_fdatawait from here so tell
905 reopen_file not to flush data to server
906 now */
907 rc = cifs_reopen_file(open_file, false);
908 if (rc != 0)
909 break;
910 }
911
912 rc = CIFSSMBWrite(xid, pTcon,
913 open_file->netfid,
914 min_t(const int, cifs_sb->wsize,
915 write_size - total_written),
916 *poffset, &bytes_written,
917 NULL, write_data + total_written, 0);
918 }
919 if (rc || (bytes_written == 0)) {
920 if (total_written)
921 break;
922 else {
923 FreeXid(xid);
924 return rc;
925 }
926 } else {
927 cifs_update_eof(cifsi, *poffset, bytes_written);
928 *poffset += bytes_written;
929 }
930 }
931
932 cifs_stats_bytes_written(pTcon, total_written);
933
934/* Do not update local mtime - server will set its actual value on write
935 * inode->i_ctime = inode->i_mtime =
936 * current_fs_time(inode->i_sb);*/
937 if (total_written > 0) {
938 spin_lock(&inode->i_lock);
939 if (*poffset > inode->i_size)
940 i_size_write(inode, *poffset);
941 spin_unlock(&inode->i_lock);
942 }
943 mark_inode_dirty_sync(inode);
944
945 FreeXid(xid);
946 return total_written;
947}
948
949static ssize_t cifs_write(struct cifsFileInfo *open_file,
950 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
951 loff_t *poffset) 862 loff_t *poffset)
952{ 863{
@@ -954,10 +865,11 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
954 unsigned int bytes_written = 0; 865 unsigned int bytes_written = 0;
955 unsigned int total_written; 866 unsigned int total_written;
956 struct cifs_sb_info *cifs_sb; 867 struct cifs_sb_info *cifs_sb;
957 struct cifsTconInfo *pTcon; 868 struct cifs_tcon *pTcon;
958 int xid; 869 int xid;
959 struct dentry *dentry = open_file->dentry; 870 struct dentry *dentry = open_file->dentry;
960 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); 871 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
872 struct cifs_io_parms io_parms;
961 873
962 cifs_sb = CIFS_SB(dentry->d_sb); 874 cifs_sb = CIFS_SB(dentry->d_sb);
963 875
@@ -990,8 +902,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
990 /* iov[0] is reserved for smb header */ 902 /* iov[0] is reserved for smb header */
991 iov[1].iov_base = (char *)write_data + total_written; 903 iov[1].iov_base = (char *)write_data + total_written;
992 iov[1].iov_len = len; 904 iov[1].iov_len = len;
993 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len, 905 io_parms.netfid = open_file->netfid;
994 *poffset, &bytes_written, iov, 1, 0); 906 io_parms.pid = pid;
907 io_parms.tcon = pTcon;
908 io_parms.offset = *poffset;
909 io_parms.length = len;
910 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
911 1, 0);
995 } 912 }
996 if (rc || (bytes_written == 0)) { 913 if (rc || (bytes_written == 0)) {
997 if (total_written) 914 if (total_written)
@@ -1160,8 +1077,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1160 1077
1161 open_file = find_writable_file(CIFS_I(mapping->host), false); 1078 open_file = find_writable_file(CIFS_I(mapping->host), false);
1162 if (open_file) { 1079 if (open_file) {
1163 bytes_written = cifs_write(open_file, write_data, 1080 bytes_written = cifs_write(open_file, open_file->pid,
1164 to - from, &offset); 1081 write_data, to - from, &offset);
1165 cifsFileInfo_put(open_file); 1082 cifsFileInfo_put(open_file);
1166 /* Does mm or vfs already set times? */ 1083 /* Does mm or vfs already set times? */
1167 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); 1084 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
@@ -1181,58 +1098,20 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1181static int cifs_writepages(struct address_space *mapping, 1098static int cifs_writepages(struct address_space *mapping,
1182 struct writeback_control *wbc) 1099 struct writeback_control *wbc)
1183{ 1100{
1184 unsigned int bytes_to_write; 1101 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1185 unsigned int bytes_written; 1102 bool done = false, scanned = false, range_whole = false;
1186 struct cifs_sb_info *cifs_sb; 1103 pgoff_t end, index;
1187 int done = 0; 1104 struct cifs_writedata *wdata;
1188 pgoff_t end;
1189 pgoff_t index;
1190 int range_whole = 0;
1191 struct kvec *iov;
1192 int len;
1193 int n_iov = 0;
1194 pgoff_t next;
1195 int nr_pages;
1196 __u64 offset = 0;
1197 struct cifsFileInfo *open_file;
1198 struct cifsTconInfo *tcon;
1199 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1200 struct page *page; 1105 struct page *page;
1201 struct pagevec pvec;
1202 int rc = 0; 1106 int rc = 0;
1203 int scanned = 0;
1204 int xid;
1205
1206 cifs_sb = CIFS_SB(mapping->host->i_sb);
1207 1107
1208 /* 1108 /*
1209 * If wsize is smaller that the page cache size, default to writing 1109 * If wsize is smaller than the page cache size, default to writing
1210 * one page at a time via cifs_writepage 1110 * one page at a time via cifs_writepage
1211 */ 1111 */
1212 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1112 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1213 return generic_writepages(mapping, wbc); 1113 return generic_writepages(mapping, wbc);
1214 1114
1215 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1216 if (iov == NULL)
1217 return generic_writepages(mapping, wbc);
1218
1219 /*
1220 * if there's no open file, then this is likely to fail too,
1221 * but it'll at least handle the return. Maybe it should be
1222 * a BUG() instead?
1223 */
1224 open_file = find_writable_file(CIFS_I(mapping->host), false);
1225 if (!open_file) {
1226 kfree(iov);
1227 return generic_writepages(mapping, wbc);
1228 }
1229
1230 tcon = tlink_tcon(open_file->tlink);
1231 cifsFileInfo_put(open_file);
1232
1233 xid = GetXid();
1234
1235 pagevec_init(&pvec, 0);
1236 if (wbc->range_cyclic) { 1115 if (wbc->range_cyclic) {
1237 index = mapping->writeback_index; /* Start from prev offset */ 1116 index = mapping->writeback_index; /* Start from prev offset */
1238 end = -1; 1117 end = -1;
@@ -1240,24 +1119,49 @@ static int cifs_writepages(struct address_space *mapping,
1240 index = wbc->range_start >> PAGE_CACHE_SHIFT; 1119 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1241 end = wbc->range_end >> PAGE_CACHE_SHIFT; 1120 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1242 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 1121 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1243 range_whole = 1; 1122 range_whole = true;
1244 scanned = 1; 1123 scanned = true;
1245 } 1124 }
1246retry: 1125retry:
1247 while (!done && (index <= end) && 1126 while (!done && index <= end) {
1248 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1127 unsigned int i, nr_pages, found_pages;
1249 PAGECACHE_TAG_DIRTY, 1128 pgoff_t next = 0, tofind;
1250 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) { 1129 struct page **pages;
1251 int first; 1130
1252 unsigned int i; 1131 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1253 1132 end - index) + 1;
1254 first = -1; 1133
1255 next = 0; 1134 wdata = cifs_writedata_alloc((unsigned int)tofind);
1256 n_iov = 0; 1135 if (!wdata) {
1257 bytes_to_write = 0; 1136 rc = -ENOMEM;
1258 1137 break;
1259 for (i = 0; i < nr_pages; i++) { 1138 }
1260 page = pvec.pages[i]; 1139
1140 /*
1141 * find_get_pages_tag seems to return a max of 256 on each
1142 * iteration, so we must call it several times in order to
1143 * fill the array or the wsize is effectively limited to
1144 * 256 * PAGE_CACHE_SIZE.
1145 */
1146 found_pages = 0;
1147 pages = wdata->pages;
1148 do {
1149 nr_pages = find_get_pages_tag(mapping, &index,
1150 PAGECACHE_TAG_DIRTY,
1151 tofind, pages);
1152 found_pages += nr_pages;
1153 tofind -= nr_pages;
1154 pages += nr_pages;
1155 } while (nr_pages && tofind && index <= end);
1156
1157 if (found_pages == 0) {
1158 kref_put(&wdata->refcount, cifs_writedata_release);
1159 break;
1160 }
1161
1162 nr_pages = 0;
1163 for (i = 0; i < found_pages; i++) {
1164 page = wdata->pages[i];
1261 /* 1165 /*
1262 * At this point we hold neither mapping->tree_lock nor 1166 * At this point we hold neither mapping->tree_lock nor
1263 * lock on the page itself: the page may be truncated or 1167 * lock on the page itself: the page may be truncated or
@@ -1266,7 +1170,7 @@ retry:
1266 * mapping 1170 * mapping
1267 */ 1171 */
1268 1172
1269 if (first < 0) 1173 if (nr_pages == 0)
1270 lock_page(page); 1174 lock_page(page);
1271 else if (!trylock_page(page)) 1175 else if (!trylock_page(page))
1272 break; 1176 break;
@@ -1277,7 +1181,7 @@ retry:
1277 } 1181 }
1278 1182
1279 if (!wbc->range_cyclic && page->index > end) { 1183 if (!wbc->range_cyclic && page->index > end) {
1280 done = 1; 1184 done = true;
1281 unlock_page(page); 1185 unlock_page(page);
1282 break; 1186 break;
1283 } 1187 }
@@ -1304,125 +1208,96 @@ retry:
1304 set_page_writeback(page); 1208 set_page_writeback(page);
1305 1209
1306 if (page_offset(page) >= mapping->host->i_size) { 1210 if (page_offset(page) >= mapping->host->i_size) {
1307 done = 1; 1211 done = true;
1308 unlock_page(page); 1212 unlock_page(page);
1309 end_page_writeback(page); 1213 end_page_writeback(page);
1310 break; 1214 break;
1311 } 1215 }
1312 1216
1313 /* 1217 wdata->pages[i] = page;
1314 * BB can we get rid of this? pages are held by pvec 1218 next = page->index + 1;
1315 */ 1219 ++nr_pages;
1316 page_cache_get(page); 1220 }
1317 1221
1318 len = min(mapping->host->i_size - page_offset(page), 1222 /* reset index to refind any pages skipped */
1319 (loff_t)PAGE_CACHE_SIZE); 1223 if (nr_pages == 0)
1224 index = wdata->pages[0]->index + 1;
1320 1225
1321 /* reserve iov[0] for the smb header */ 1226 /* put any pages we aren't going to use */
1322 n_iov++; 1227 for (i = nr_pages; i < found_pages; i++) {
1323 iov[n_iov].iov_base = kmap(page); 1228 page_cache_release(wdata->pages[i]);
1324 iov[n_iov].iov_len = len; 1229 wdata->pages[i] = NULL;
1325 bytes_to_write += len; 1230 }
1326 1231
1327 if (first < 0) { 1232 /* nothing to write? */
1328 first = i; 1233 if (nr_pages == 0) {
1329 offset = page_offset(page); 1234 kref_put(&wdata->refcount, cifs_writedata_release);
1330 } 1235 continue;
1331 next = page->index + 1;
1332 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1333 break;
1334 } 1236 }
1335 if (n_iov) {
1336retry_write:
1337 open_file = find_writable_file(CIFS_I(mapping->host),
1338 false);
1339 if (!open_file) {
1340 cERROR(1, "No writable handles for inode");
1341 rc = -EBADF;
1342 } else {
1343 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1344 bytes_to_write, offset,
1345 &bytes_written, iov, n_iov,
1346 0);
1347 cifsFileInfo_put(open_file);
1348 }
1349 1237
1350 cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); 1238 wdata->sync_mode = wbc->sync_mode;
1239 wdata->nr_pages = nr_pages;
1240 wdata->offset = page_offset(wdata->pages[0]);
1351 1241
1352 /* 1242 do {
1353 * For now, treat a short write as if nothing got 1243 if (wdata->cfile != NULL)
1354 * written. A zero length write however indicates 1244 cifsFileInfo_put(wdata->cfile);
1355 * ENOSPC or EFBIG. We have no way to know which 1245 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1356 * though, so call it ENOSPC for now. EFBIG would 1246 false);
1357 * get translated to AS_EIO anyway. 1247 if (!wdata->cfile) {
1358 * 1248 cERROR(1, "No writable handles for inode");
1359 * FIXME: make it take into account the data that did 1249 rc = -EBADF;
1360 * get written 1250 break;
1361 */
1362 if (rc == 0) {
1363 if (bytes_written == 0)
1364 rc = -ENOSPC;
1365 else if (bytes_written < bytes_to_write)
1366 rc = -EAGAIN;
1367 } 1251 }
1252 rc = cifs_async_writev(wdata);
1253 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1368 1254
1369 /* retry on data-integrity flush */ 1255 for (i = 0; i < nr_pages; ++i)
1370 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 1256 unlock_page(wdata->pages[i]);
1371 goto retry_write;
1372 1257
1373 /* fix the stats and EOF */ 1258 /* send failure -- clean up the mess */
1374 if (bytes_written > 0) { 1259 if (rc != 0) {
1375 cifs_stats_bytes_written(tcon, bytes_written); 1260 for (i = 0; i < nr_pages; ++i) {
1376 cifs_update_eof(cifsi, offset, bytes_written);
1377 }
1378
1379 for (i = 0; i < n_iov; i++) {
1380 page = pvec.pages[first + i];
1381 /* on retryable write error, redirty page */
1382 if (rc == -EAGAIN) 1261 if (rc == -EAGAIN)
1383 redirty_page_for_writepage(wbc, page); 1262 redirty_page_for_writepage(wbc,
1384 else if (rc != 0) 1263 wdata->pages[i]);
1385 SetPageError(page); 1264 else
1386 kunmap(page); 1265 SetPageError(wdata->pages[i]);
1387 unlock_page(page); 1266 end_page_writeback(wdata->pages[i]);
1388 end_page_writeback(page); 1267 page_cache_release(wdata->pages[i]);
1389 page_cache_release(page);
1390 } 1268 }
1391
1392 if (rc != -EAGAIN) 1269 if (rc != -EAGAIN)
1393 mapping_set_error(mapping, rc); 1270 mapping_set_error(mapping, rc);
1394 else 1271 }
1395 rc = 0; 1272 kref_put(&wdata->refcount, cifs_writedata_release);
1396 1273
1397 if ((wbc->nr_to_write -= n_iov) <= 0) 1274 wbc->nr_to_write -= nr_pages;
1398 done = 1; 1275 if (wbc->nr_to_write <= 0)
1399 index = next; 1276 done = true;
1400 } else
1401 /* Need to re-find the pages we skipped */
1402 index = pvec.pages[0]->index + 1;
1403 1277
1404 pagevec_release(&pvec); 1278 index = next;
1405 } 1279 }
1280
1406 if (!scanned && !done) { 1281 if (!scanned && !done) {
1407 /* 1282 /*
1408 * We hit the last page and there is more work to be done: wrap 1283 * We hit the last page and there is more work to be done: wrap
1409 * back to the start of the file 1284 * back to the start of the file
1410 */ 1285 */
1411 scanned = 1; 1286 scanned = true;
1412 index = 0; 1287 index = 0;
1413 goto retry; 1288 goto retry;
1414 } 1289 }
1290
1415 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1291 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1416 mapping->writeback_index = index; 1292 mapping->writeback_index = index;
1417 1293
1418 FreeXid(xid);
1419 kfree(iov);
1420 return rc; 1294 return rc;
1421} 1295}
1422 1296
1423static int cifs_writepage(struct page *page, struct writeback_control *wbc) 1297static int
1298cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1424{ 1299{
1425 int rc = -EFAULT; 1300 int rc;
1426 int xid; 1301 int xid;
1427 1302
1428 xid = GetXid(); 1303 xid = GetXid();
@@ -1442,21 +1317,43 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1442 * to fail to update with the state of the page correctly. 1317 * to fail to update with the state of the page correctly.
1443 */ 1318 */
1444 set_page_writeback(page); 1319 set_page_writeback(page);
1320retry_write:
1445 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); 1321 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1446 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ 1322 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1447 unlock_page(page); 1323 goto retry_write;
1324 else if (rc == -EAGAIN)
1325 redirty_page_for_writepage(wbc, page);
1326 else if (rc != 0)
1327 SetPageError(page);
1328 else
1329 SetPageUptodate(page);
1448 end_page_writeback(page); 1330 end_page_writeback(page);
1449 page_cache_release(page); 1331 page_cache_release(page);
1450 FreeXid(xid); 1332 FreeXid(xid);
1451 return rc; 1333 return rc;
1452} 1334}
1453 1335
1336static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1337{
1338 int rc = cifs_writepage_locked(page, wbc);
1339 unlock_page(page);
1340 return rc;
1341}
1342
1454static int cifs_write_end(struct file *file, struct address_space *mapping, 1343static int cifs_write_end(struct file *file, struct address_space *mapping,
1455 loff_t pos, unsigned len, unsigned copied, 1344 loff_t pos, unsigned len, unsigned copied,
1456 struct page *page, void *fsdata) 1345 struct page *page, void *fsdata)
1457{ 1346{
1458 int rc; 1347 int rc;
1459 struct inode *inode = mapping->host; 1348 struct inode *inode = mapping->host;
1349 struct cifsFileInfo *cfile = file->private_data;
1350 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1351 __u32 pid;
1352
1353 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1354 pid = cfile->pid;
1355 else
1356 pid = current->tgid;
1460 1357
1461 cFYI(1, "write_end for page %p from pos %lld with %d bytes", 1358 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1462 page, pos, copied); 1359 page, pos, copied);
@@ -1480,8 +1377,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1480 /* BB check if anything else missing out of ppw 1377 /* BB check if anything else missing out of ppw
1481 such as updating last write time */ 1378 such as updating last write time */
1482 page_data = kmap(page); 1379 page_data = kmap(page);
1483 rc = cifs_write(file->private_data, page_data + offset, 1380 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1484 copied, &pos);
1485 /* if (rc < 0) should we set writebehind rc? */ 1381 /* if (rc < 0) should we set writebehind rc? */
1486 kunmap(page); 1382 kunmap(page);
1487 1383
@@ -1509,7 +1405,7 @@ int cifs_strict_fsync(struct file *file, int datasync)
1509{ 1405{
1510 int xid; 1406 int xid;
1511 int rc = 0; 1407 int rc = 0;
1512 struct cifsTconInfo *tcon; 1408 struct cifs_tcon *tcon;
1513 struct cifsFileInfo *smbfile = file->private_data; 1409 struct cifsFileInfo *smbfile = file->private_data;
1514 struct inode *inode = file->f_path.dentry->d_inode; 1410 struct inode *inode = file->f_path.dentry->d_inode;
1515 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1411 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1519,8 +1415,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
1519 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1415 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1520 file->f_path.dentry->d_name.name, datasync); 1416 file->f_path.dentry->d_name.name, datasync);
1521 1417
1522 if (!CIFS_I(inode)->clientCanCacheRead) 1418 if (!CIFS_I(inode)->clientCanCacheRead) {
1523 cifs_invalidate_mapping(inode); 1419 rc = cifs_invalidate_mapping(inode);
1420 if (rc) {
1421 cFYI(1, "rc: %d during invalidate phase", rc);
1422 rc = 0; /* don't care about it in fsync */
1423 }
1424 }
1524 1425
1525 tcon = tlink_tcon(smbfile->tlink); 1426 tcon = tlink_tcon(smbfile->tlink);
1526 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 1427 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1534,7 +1435,7 @@ int cifs_fsync(struct file *file, int datasync)
1534{ 1435{
1535 int xid; 1436 int xid;
1536 int rc = 0; 1437 int rc = 0;
1537 struct cifsTconInfo *tcon; 1438 struct cifs_tcon *tcon;
1538 struct cifsFileInfo *smbfile = file->private_data; 1439 struct cifsFileInfo *smbfile = file->private_data;
1539 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1440 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1540 1441
@@ -1625,9 +1526,11 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1625 struct iov_iter it; 1526 struct iov_iter it;
1626 struct inode *inode; 1527 struct inode *inode;
1627 struct cifsFileInfo *open_file; 1528 struct cifsFileInfo *open_file;
1628 struct cifsTconInfo *pTcon; 1529 struct cifs_tcon *pTcon;
1629 struct cifs_sb_info *cifs_sb; 1530 struct cifs_sb_info *cifs_sb;
1531 struct cifs_io_parms io_parms;
1630 int xid, rc; 1532 int xid, rc;
1533 __u32 pid;
1631 1534
1632 len = iov_length(iov, nr_segs); 1535 len = iov_length(iov, nr_segs);
1633 if (!len) 1536 if (!len)
@@ -1659,6 +1562,12 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1659 1562
1660 xid = GetXid(); 1563 xid = GetXid();
1661 open_file = file->private_data; 1564 open_file = file->private_data;
1565
1566 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1567 pid = open_file->pid;
1568 else
1569 pid = current->tgid;
1570
1662 pTcon = tlink_tcon(open_file->tlink); 1571 pTcon = tlink_tcon(open_file->tlink);
1663 inode = file->f_path.dentry->d_inode; 1572 inode = file->f_path.dentry->d_inode;
1664 1573
@@ -1685,9 +1594,13 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1685 if (rc != 0) 1594 if (rc != 0)
1686 break; 1595 break;
1687 } 1596 }
1688 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, 1597 io_parms.netfid = open_file->netfid;
1689 cur_len, *poffset, &written, 1598 io_parms.pid = pid;
1690 to_send, npages, 0); 1599 io_parms.tcon = pTcon;
1600 io_parms.offset = *poffset;
1601 io_parms.length = cur_len;
1602 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
1603 npages, 0);
1691 } while (rc == -EAGAIN); 1604 } while (rc == -EAGAIN);
1692 1605
1693 for (i = 0; i < npages; i++) 1606 for (i = 0; i < npages; i++)
@@ -1726,7 +1639,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1726 return total_written; 1639 return total_written;
1727} 1640}
1728 1641
1729static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 1642ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1730 unsigned long nr_segs, loff_t pos) 1643 unsigned long nr_segs, loff_t pos)
1731{ 1644{
1732 ssize_t written; 1645 ssize_t written;
@@ -1780,10 +1693,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1780 size_t len, cur_len; 1693 size_t len, cur_len;
1781 int iov_offset = 0; 1694 int iov_offset = 0;
1782 struct cifs_sb_info *cifs_sb; 1695 struct cifs_sb_info *cifs_sb;
1783 struct cifsTconInfo *pTcon; 1696 struct cifs_tcon *pTcon;
1784 struct cifsFileInfo *open_file; 1697 struct cifsFileInfo *open_file;
1785 struct smb_com_read_rsp *pSMBr; 1698 struct smb_com_read_rsp *pSMBr;
1699 struct cifs_io_parms io_parms;
1786 char *read_data; 1700 char *read_data;
1701 __u32 pid;
1787 1702
1788 if (!nr_segs) 1703 if (!nr_segs)
1789 return 0; 1704 return 0;
@@ -1798,6 +1713,11 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1798 open_file = file->private_data; 1713 open_file = file->private_data;
1799 pTcon = tlink_tcon(open_file->tlink); 1714 pTcon = tlink_tcon(open_file->tlink);
1800 1715
1716 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1717 pid = open_file->pid;
1718 else
1719 pid = current->tgid;
1720
1801 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1721 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1802 cFYI(1, "attempting read on write only file instance"); 1722 cFYI(1, "attempting read on write only file instance");
1803 1723
@@ -1813,8 +1733,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1813 if (rc != 0) 1733 if (rc != 0)
1814 break; 1734 break;
1815 } 1735 }
1816 rc = CIFSSMBRead(xid, pTcon, open_file->netfid, 1736 io_parms.netfid = open_file->netfid;
1817 cur_len, *poffset, &bytes_read, 1737 io_parms.pid = pid;
1738 io_parms.tcon = pTcon;
1739 io_parms.offset = *poffset;
1740 io_parms.length = len;
1741 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1818 &read_data, &buf_type); 1742 &read_data, &buf_type);
1819 pSMBr = (struct smb_com_read_rsp *)read_data; 1743 pSMBr = (struct smb_com_read_rsp *)read_data;
1820 if (read_data) { 1744 if (read_data) {
@@ -1849,17 +1773,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1849 return total_read; 1773 return total_read;
1850} 1774}
1851 1775
1852ssize_t cifs_user_read(struct file *file, char __user *read_data, 1776ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1853 size_t read_size, loff_t *poffset)
1854{
1855 struct iovec iov;
1856 iov.iov_base = read_data;
1857 iov.iov_len = read_size;
1858
1859 return cifs_iovec_read(file, &iov, 1, poffset);
1860}
1861
1862static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1863 unsigned long nr_segs, loff_t pos) 1777 unsigned long nr_segs, loff_t pos)
1864{ 1778{
1865 ssize_t read; 1779 ssize_t read;
@@ -1901,11 +1815,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1901 unsigned int total_read; 1815 unsigned int total_read;
1902 unsigned int current_read_size; 1816 unsigned int current_read_size;
1903 struct cifs_sb_info *cifs_sb; 1817 struct cifs_sb_info *cifs_sb;
1904 struct cifsTconInfo *pTcon; 1818 struct cifs_tcon *pTcon;
1905 int xid; 1819 int xid;
1906 char *current_offset; 1820 char *current_offset;
1907 struct cifsFileInfo *open_file; 1821 struct cifsFileInfo *open_file;
1822 struct cifs_io_parms io_parms;
1908 int buf_type = CIFS_NO_BUFFER; 1823 int buf_type = CIFS_NO_BUFFER;
1824 __u32 pid;
1909 1825
1910 xid = GetXid(); 1826 xid = GetXid();
1911 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1827 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1918,6 +1834,11 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1918 open_file = file->private_data; 1834 open_file = file->private_data;
1919 pTcon = tlink_tcon(open_file->tlink); 1835 pTcon = tlink_tcon(open_file->tlink);
1920 1836
1837 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1838 pid = open_file->pid;
1839 else
1840 pid = current->tgid;
1841
1921 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1842 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1922 cFYI(1, "attempting read on write only file instance"); 1843 cFYI(1, "attempting read on write only file instance");
1923 1844
@@ -1940,11 +1861,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1940 if (rc != 0) 1861 if (rc != 0)
1941 break; 1862 break;
1942 } 1863 }
1943 rc = CIFSSMBRead(xid, pTcon, 1864 io_parms.netfid = open_file->netfid;
1944 open_file->netfid, 1865 io_parms.pid = pid;
1945 current_read_size, *poffset, 1866 io_parms.tcon = pTcon;
1946 &bytes_read, &current_offset, 1867 io_parms.offset = *poffset;
1947 &buf_type); 1868 io_parms.length = current_read_size;
1869 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1870 &current_offset, &buf_type);
1948 } 1871 }
1949 if (rc || (bytes_read == 0)) { 1872 if (rc || (bytes_read == 0)) {
1950 if (total_read) { 1873 if (total_read) {
@@ -1987,8 +1910,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1987 1910
1988 xid = GetXid(); 1911 xid = GetXid();
1989 1912
1990 if (!CIFS_I(inode)->clientCanCacheRead) 1913 if (!CIFS_I(inode)->clientCanCacheRead) {
1991 cifs_invalidate_mapping(inode); 1914 rc = cifs_invalidate_mapping(inode);
1915 if (rc)
1916 return rc;
1917 }
1992 1918
1993 rc = generic_file_mmap(file, vma); 1919 rc = generic_file_mmap(file, vma);
1994 if (rc == 0) 1920 if (rc == 0)
@@ -2072,13 +1998,15 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2072 loff_t offset; 1998 loff_t offset;
2073 struct page *page; 1999 struct page *page;
2074 struct cifs_sb_info *cifs_sb; 2000 struct cifs_sb_info *cifs_sb;
2075 struct cifsTconInfo *pTcon; 2001 struct cifs_tcon *pTcon;
2076 unsigned int bytes_read = 0; 2002 unsigned int bytes_read = 0;
2077 unsigned int read_size, i; 2003 unsigned int read_size, i;
2078 char *smb_read_data = NULL; 2004 char *smb_read_data = NULL;
2079 struct smb_com_read_rsp *pSMBr; 2005 struct smb_com_read_rsp *pSMBr;
2080 struct cifsFileInfo *open_file; 2006 struct cifsFileInfo *open_file;
2007 struct cifs_io_parms io_parms;
2081 int buf_type = CIFS_NO_BUFFER; 2008 int buf_type = CIFS_NO_BUFFER;
2009 __u32 pid;
2082 2010
2083 xid = GetXid(); 2011 xid = GetXid();
2084 if (file->private_data == NULL) { 2012 if (file->private_data == NULL) {
@@ -2100,6 +2028,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2100 goto read_complete; 2028 goto read_complete;
2101 2029
2102 cFYI(DBG2, "rpages: num pages %d", num_pages); 2030 cFYI(DBG2, "rpages: num pages %d", num_pages);
2031 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2032 pid = open_file->pid;
2033 else
2034 pid = current->tgid;
2035
2103 for (i = 0; i < num_pages; ) { 2036 for (i = 0; i < num_pages; ) {
2104 unsigned contig_pages; 2037 unsigned contig_pages;
2105 struct page *tmp_page; 2038 struct page *tmp_page;
@@ -2141,12 +2074,13 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2141 if (rc != 0) 2074 if (rc != 0)
2142 break; 2075 break;
2143 } 2076 }
2144 2077 io_parms.netfid = open_file->netfid;
2145 rc = CIFSSMBRead(xid, pTcon, 2078 io_parms.pid = pid;
2146 open_file->netfid, 2079 io_parms.tcon = pTcon;
2147 read_size, offset, 2080 io_parms.offset = offset;
2148 &bytes_read, &smb_read_data, 2081 io_parms.length = read_size;
2149 &buf_type); 2082 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2083 &smb_read_data, &buf_type);
2150 /* BB more RC checks ? */ 2084 /* BB more RC checks ? */
2151 if (rc == -EAGAIN) { 2085 if (rc == -EAGAIN) {
2152 if (smb_read_data) { 2086 if (smb_read_data) {
@@ -2415,6 +2349,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
2415 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); 2349 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2416} 2350}
2417 2351
2352static int cifs_launder_page(struct page *page)
2353{
2354 int rc = 0;
2355 loff_t range_start = page_offset(page);
2356 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2357 struct writeback_control wbc = {
2358 .sync_mode = WB_SYNC_ALL,
2359 .nr_to_write = 0,
2360 .range_start = range_start,
2361 .range_end = range_end,
2362 };
2363
2364 cFYI(1, "Launder page: %p", page);
2365
2366 if (clear_page_dirty_for_io(page))
2367 rc = cifs_writepage_locked(page, &wbc);
2368
2369 cifs_fscache_invalidate_page(page, page->mapping->host);
2370 return rc;
2371}
2372
2418void cifs_oplock_break(struct work_struct *work) 2373void cifs_oplock_break(struct work_struct *work)
2419{ 2374{
2420 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2375 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2441,7 @@ const struct address_space_operations cifs_addr_ops = {
2486 .set_page_dirty = __set_page_dirty_nobuffers, 2441 .set_page_dirty = __set_page_dirty_nobuffers,
2487 .releasepage = cifs_release_page, 2442 .releasepage = cifs_release_page,
2488 .invalidatepage = cifs_invalidate_page, 2443 .invalidatepage = cifs_invalidate_page,
2489 /* .direct_IO = */ 2444 .launder_page = cifs_launder_page,
2490}; 2445};
2491 2446
2492/* 2447/*
@@ -2503,5 +2458,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2503 .set_page_dirty = __set_page_dirty_nobuffers, 2458 .set_page_dirty = __set_page_dirty_nobuffers,
2504 .releasepage = cifs_release_page, 2459 .releasepage = cifs_release_page,
2505 .invalidatepage = cifs_invalidate_page, 2460 .invalidatepage = cifs_invalidate_page,
2506 /* .direct_IO = */ 2461 .launder_page = cifs_launder_page,
2507}; 2462};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 297a43d0ff7f..d368a47ba5eb 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -40,7 +40,7 @@ void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
40 server->fscache = NULL; 40 server->fscache = NULL;
41} 41}
42 42
43void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) 43void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
44{ 44{
45 struct TCP_Server_Info *server = tcon->ses->server; 45 struct TCP_Server_Info *server = tcon->ses->server;
46 46
@@ -51,7 +51,7 @@ void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
51 server->fscache, tcon->fscache); 51 server->fscache, tcon->fscache);
52} 52}
53 53
54void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) 54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
55{ 55{
56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); 56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
57 fscache_relinquish_cookie(tcon->fscache, 0); 57 fscache_relinquish_cookie(tcon->fscache, 0);
@@ -62,7 +62,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
62{ 62{
63 struct cifsInodeInfo *cifsi = CIFS_I(inode); 63 struct cifsInodeInfo *cifsi = CIFS_I(inode);
64 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 64 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
65 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 65 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
66 66
67 if (cifsi->fscache) 67 if (cifsi->fscache)
68 return; 68 return;
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 31b88ec2341e..63539323e0b9 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -40,8 +40,8 @@ extern void cifs_fscache_unregister(void);
40 */ 40 */
41extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *); 41extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
42extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *); 42extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
43extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *); 43extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
44extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *); 44extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
45 45
46extern void cifs_fscache_release_inode_cookie(struct inode *); 46extern void cifs_fscache_release_inode_cookie(struct inode *);
47extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *); 47extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
@@ -99,9 +99,9 @@ static inline void
99cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {} 99cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
100static inline void 100static inline void
101cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {} 101cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
102static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {} 102static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {}
103static inline void 103static inline void
104cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {} 104cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
105 105
106static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} 106static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
107static inline void cifs_fscache_set_inode_cookie(struct inode *inode, 107static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..9b018c8334fa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -295,7 +295,7 @@ int cifs_get_file_info_unix(struct file *filp)
295 struct inode *inode = filp->f_path.dentry->d_inode; 295 struct inode *inode = filp->f_path.dentry->d_inode;
296 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 296 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
297 struct cifsFileInfo *cfile = filp->private_data; 297 struct cifsFileInfo *cfile = filp->private_data;
298 struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); 298 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
299 299
300 xid = GetXid(); 300 xid = GetXid();
301 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); 301 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -318,7 +318,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
318 int rc; 318 int rc;
319 FILE_UNIX_BASIC_INFO find_data; 319 FILE_UNIX_BASIC_INFO find_data;
320 struct cifs_fattr fattr; 320 struct cifs_fattr fattr;
321 struct cifsTconInfo *tcon; 321 struct cifs_tcon *tcon;
322 struct tcon_link *tlink; 322 struct tcon_link *tlink;
323 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 323 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
324 324
@@ -373,7 +373,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
373 int oplock = 0; 373 int oplock = 0;
374 __u16 netfid; 374 __u16 netfid;
375 struct tcon_link *tlink; 375 struct tcon_link *tlink;
376 struct cifsTconInfo *tcon; 376 struct cifs_tcon *tcon;
377 struct cifs_io_parms io_parms;
377 char buf[24]; 378 char buf[24];
378 unsigned int bytes_read; 379 unsigned int bytes_read;
379 char *pbuf; 380 char *pbuf;
@@ -405,9 +406,13 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
405 if (rc == 0) { 406 if (rc == 0) {
406 int buf_type = CIFS_NO_BUFFER; 407 int buf_type = CIFS_NO_BUFFER;
407 /* Read header */ 408 /* Read header */
408 rc = CIFSSMBRead(xid, tcon, netfid, 409 io_parms.netfid = netfid;
409 24 /* length */, 0 /* offset */, 410 io_parms.pid = current->tgid;
410 &bytes_read, &pbuf, &buf_type); 411 io_parms.tcon = tcon;
412 io_parms.offset = 0;
413 io_parms.length = 24;
414 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf,
415 &buf_type);
411 if ((rc == 0) && (bytes_read >= 8)) { 416 if ((rc == 0) && (bytes_read >= 8)) {
412 if (memcmp("IntxBLK", pbuf, 8) == 0) { 417 if (memcmp("IntxBLK", pbuf, 8) == 0) {
413 cFYI(1, "Block device"); 418 cFYI(1, "Block device");
@@ -468,7 +473,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
468 char ea_value[4]; 473 char ea_value[4];
469 __u32 mode; 474 __u32 mode;
470 struct tcon_link *tlink; 475 struct tcon_link *tlink;
471 struct cifsTconInfo *tcon; 476 struct cifs_tcon *tcon;
472 477
473 tlink = cifs_sb_tlink(cifs_sb); 478 tlink = cifs_sb_tlink(cifs_sb);
474 if (IS_ERR(tlink)) 479 if (IS_ERR(tlink))
@@ -502,7 +507,7 @@ static void
502cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, 507cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
503 struct cifs_sb_info *cifs_sb, bool adjust_tz) 508 struct cifs_sb_info *cifs_sb, bool adjust_tz)
504{ 509{
505 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 510 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
506 511
507 memset(fattr, 0, sizeof(*fattr)); 512 memset(fattr, 0, sizeof(*fattr));
508 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); 513 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
@@ -553,7 +558,7 @@ int cifs_get_file_info(struct file *filp)
553 struct inode *inode = filp->f_path.dentry->d_inode; 558 struct inode *inode = filp->f_path.dentry->d_inode;
554 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 559 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
555 struct cifsFileInfo *cfile = filp->private_data; 560 struct cifsFileInfo *cfile = filp->private_data;
556 struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); 561 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
557 562
558 xid = GetXid(); 563 xid = GetXid();
559 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); 564 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -590,7 +595,7 @@ int cifs_get_inode_info(struct inode **pinode,
590 struct super_block *sb, int xid, const __u16 *pfid) 595 struct super_block *sb, int xid, const __u16 *pfid)
591{ 596{
592 int rc = 0, tmprc; 597 int rc = 0, tmprc;
593 struct cifsTconInfo *pTcon; 598 struct cifs_tcon *pTcon;
594 struct tcon_link *tlink; 599 struct tcon_link *tlink;
595 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 600 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
596 char *buf = NULL; 601 char *buf = NULL;
@@ -735,10 +740,10 @@ static const struct inode_operations cifs_ipc_inode_ops = {
735 .lookup = cifs_lookup, 740 .lookup = cifs_lookup,
736}; 741};
737 742
738char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 743char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
739 struct cifsTconInfo *tcon) 744 struct cifs_tcon *tcon)
740{ 745{
741 int pplen = cifs_sb->prepathlen; 746 int pplen = vol->prepath ? strlen(vol->prepath) : 0;
742 int dfsplen; 747 int dfsplen;
743 char *full_path = NULL; 748 char *full_path = NULL;
744 749
@@ -772,7 +777,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
772 } 777 }
773 } 778 }
774 } 779 }
775 strncpy(full_path + dfsplen, cifs_sb->prepath, pplen); 780 strncpy(full_path + dfsplen, vol->prepath, pplen);
776 full_path[dfsplen + pplen] = 0; /* add trailing null */ 781 full_path[dfsplen + pplen] = 0; /* add trailing null */
777 return full_path; 782 return full_path;
778} 783}
@@ -878,25 +883,19 @@ retry_iget5_locked:
878} 883}
879 884
880/* gets root inode */ 885/* gets root inode */
881struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 886struct inode *cifs_root_iget(struct super_block *sb)
882{ 887{
883 int xid; 888 int xid;
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 889 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
885 struct inode *inode = NULL; 890 struct inode *inode = NULL;
886 long rc; 891 long rc;
887 char *full_path; 892 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
888 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
889
890 full_path = cifs_build_path_to_root(cifs_sb, tcon);
891 if (full_path == NULL)
892 return ERR_PTR(-ENOMEM);
893 893
894 xid = GetXid(); 894 xid = GetXid();
895 if (tcon->unix_ext) 895 if (tcon->unix_ext)
896 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); 896 rc = cifs_get_inode_info_unix(&inode, "", sb, xid);
897 else 897 else
898 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 898 rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL);
899 xid, NULL);
900 899
901 if (!inode) { 900 if (!inode) {
902 inode = ERR_PTR(rc); 901 inode = ERR_PTR(rc);
@@ -922,7 +921,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
922 } 921 }
923 922
924out: 923out:
925 kfree(full_path);
926 /* can not call macro FreeXid here since in a void func 924 /* can not call macro FreeXid here since in a void func
927 * TODO: This is no longer true 925 * TODO: This is no longer true
928 */ 926 */
@@ -943,7 +941,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
943 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 941 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
944 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 942 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
945 struct tcon_link *tlink = NULL; 943 struct tcon_link *tlink = NULL;
946 struct cifsTconInfo *pTcon; 944 struct cifs_tcon *pTcon;
947 FILE_BASIC_INFO info_buf; 945 FILE_BASIC_INFO info_buf;
948 946
949 if (attrs == NULL) 947 if (attrs == NULL)
@@ -1061,7 +1059,7 @@ cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid)
1061 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1059 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1062 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1060 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1063 struct tcon_link *tlink; 1061 struct tcon_link *tlink;
1064 struct cifsTconInfo *tcon; 1062 struct cifs_tcon *tcon;
1065 __u32 dosattr, origattr; 1063 __u32 dosattr, origattr;
1066 FILE_BASIC_INFO *info_buf = NULL; 1064 FILE_BASIC_INFO *info_buf = NULL;
1067 1065
@@ -1179,7 +1177,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1179 struct super_block *sb = dir->i_sb; 1177 struct super_block *sb = dir->i_sb;
1180 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 1178 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
1181 struct tcon_link *tlink; 1179 struct tcon_link *tlink;
1182 struct cifsTconInfo *tcon; 1180 struct cifs_tcon *tcon;
1183 struct iattr *attrs = NULL; 1181 struct iattr *attrs = NULL;
1184 __u32 dosattr = 0, origattr = 0; 1182 __u32 dosattr = 0, origattr = 0;
1185 1183
@@ -1277,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1277 int xid; 1275 int xid;
1278 struct cifs_sb_info *cifs_sb; 1276 struct cifs_sb_info *cifs_sb;
1279 struct tcon_link *tlink; 1277 struct tcon_link *tlink;
1280 struct cifsTconInfo *pTcon; 1278 struct cifs_tcon *pTcon;
1281 char *full_path = NULL; 1279 char *full_path = NULL;
1282 struct inode *newinode = NULL; 1280 struct inode *newinode = NULL;
1283 struct cifs_fattr fattr; 1281 struct cifs_fattr fattr;
@@ -1455,7 +1453,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1455 int xid; 1453 int xid;
1456 struct cifs_sb_info *cifs_sb; 1454 struct cifs_sb_info *cifs_sb;
1457 struct tcon_link *tlink; 1455 struct tcon_link *tlink;
1458 struct cifsTconInfo *pTcon; 1456 struct cifs_tcon *pTcon;
1459 char *full_path = NULL; 1457 char *full_path = NULL;
1460 struct cifsInodeInfo *cifsInode; 1458 struct cifsInodeInfo *cifsInode;
1461 1459
@@ -1512,7 +1510,7 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1512{ 1510{
1513 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); 1511 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb);
1514 struct tcon_link *tlink; 1512 struct tcon_link *tlink;
1515 struct cifsTconInfo *pTcon; 1513 struct cifs_tcon *pTcon;
1516 __u16 srcfid; 1514 __u16 srcfid;
1517 int oplock, rc; 1515 int oplock, rc;
1518 1516
@@ -1564,7 +1562,7 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1564 char *toName = NULL; 1562 char *toName = NULL;
1565 struct cifs_sb_info *cifs_sb; 1563 struct cifs_sb_info *cifs_sb;
1566 struct tcon_link *tlink; 1564 struct tcon_link *tlink;
1567 struct cifsTconInfo *tcon; 1565 struct cifs_tcon *tcon;
1568 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1566 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1569 FILE_UNIX_BASIC_INFO *info_buf_target; 1567 FILE_UNIX_BASIC_INFO *info_buf_target;
1570 int xid, rc, tmprc; 1568 int xid, rc, tmprc;
@@ -1683,71 +1681,70 @@ cifs_inode_needs_reval(struct inode *inode)
1683/* 1681/*
1684 * Zap the cache. Called when invalid_mapping flag is set. 1682 * Zap the cache. Called when invalid_mapping flag is set.
1685 */ 1683 */
1686void 1684int
1687cifs_invalidate_mapping(struct inode *inode) 1685cifs_invalidate_mapping(struct inode *inode)
1688{ 1686{
1689 int rc; 1687 int rc = 0;
1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1688 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1691 1689
1692 cifs_i->invalid_mapping = false; 1690 cifs_i->invalid_mapping = false;
1693 1691
1694 /* write back any cached data */
1695 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1692 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1696 rc = filemap_write_and_wait(inode->i_mapping); 1693 rc = invalidate_inode_pages2(inode->i_mapping);
1697 mapping_set_error(inode->i_mapping, rc); 1694 if (rc) {
1695 cERROR(1, "%s: could not invalidate inode %p", __func__,
1696 inode);
1697 cifs_i->invalid_mapping = true;
1698 }
1698 } 1699 }
1699 invalidate_remote_inode(inode); 1700
1700 cifs_fscache_reset_inode_cookie(inode); 1701 cifs_fscache_reset_inode_cookie(inode);
1702 return rc;
1701} 1703}
1702 1704
1703int cifs_revalidate_file(struct file *filp) 1705int cifs_revalidate_file_attr(struct file *filp)
1704{ 1706{
1705 int rc = 0; 1707 int rc = 0;
1706 struct inode *inode = filp->f_path.dentry->d_inode; 1708 struct inode *inode = filp->f_path.dentry->d_inode;
1707 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1709 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1708 1710
1709 if (!cifs_inode_needs_reval(inode)) 1711 if (!cifs_inode_needs_reval(inode))
1710 goto check_inval; 1712 return rc;
1711 1713
1712 if (tlink_tcon(cfile->tlink)->unix_ext) 1714 if (tlink_tcon(cfile->tlink)->unix_ext)
1713 rc = cifs_get_file_info_unix(filp); 1715 rc = cifs_get_file_info_unix(filp);
1714 else 1716 else
1715 rc = cifs_get_file_info(filp); 1717 rc = cifs_get_file_info(filp);
1716 1718
1717check_inval:
1718 if (CIFS_I(inode)->invalid_mapping)
1719 cifs_invalidate_mapping(inode);
1720
1721 return rc; 1719 return rc;
1722} 1720}
1723 1721
1724/* revalidate a dentry's inode attributes */ 1722int cifs_revalidate_dentry_attr(struct dentry *dentry)
1725int cifs_revalidate_dentry(struct dentry *dentry)
1726{ 1723{
1727 int xid; 1724 int xid;
1728 int rc = 0; 1725 int rc = 0;
1729 char *full_path = NULL;
1730 struct inode *inode = dentry->d_inode; 1726 struct inode *inode = dentry->d_inode;
1731 struct super_block *sb = dentry->d_sb; 1727 struct super_block *sb = dentry->d_sb;
1728 char *full_path = NULL;
1732 1729
1733 if (inode == NULL) 1730 if (inode == NULL)
1734 return -ENOENT; 1731 return -ENOENT;
1735 1732
1736 xid = GetXid();
1737
1738 if (!cifs_inode_needs_reval(inode)) 1733 if (!cifs_inode_needs_reval(inode))
1739 goto check_inval; 1734 return rc;
1735
1736 xid = GetXid();
1740 1737
1741 /* can not safely grab the rename sem here if rename calls revalidate 1738 /* can not safely grab the rename sem here if rename calls revalidate
1742 since that would deadlock */ 1739 since that would deadlock */
1743 full_path = build_path_from_dentry(dentry); 1740 full_path = build_path_from_dentry(dentry);
1744 if (full_path == NULL) { 1741 if (full_path == NULL) {
1745 rc = -ENOMEM; 1742 rc = -ENOMEM;
1746 goto check_inval; 1743 goto out;
1747 } 1744 }
1748 1745
1749 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1746 cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
1750 "jiffies %ld", full_path, inode, inode->i_count.counter, 1747 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
1751 dentry, dentry->d_time, jiffies); 1748 dentry, dentry->d_time, jiffies);
1752 1749
1753 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) 1750 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1753,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1756 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 1753 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
1757 xid, NULL); 1754 xid, NULL);
1758 1755
1759check_inval: 1756out:
1760 if (CIFS_I(inode)->invalid_mapping)
1761 cifs_invalidate_mapping(inode);
1762
1763 kfree(full_path); 1757 kfree(full_path);
1764 FreeXid(xid); 1758 FreeXid(xid);
1765 return rc; 1759 return rc;
1766} 1760}
1767 1761
1762int cifs_revalidate_file(struct file *filp)
1763{
1764 int rc;
1765 struct inode *inode = filp->f_path.dentry->d_inode;
1766
1767 rc = cifs_revalidate_file_attr(filp);
1768 if (rc)
1769 return rc;
1770
1771 if (CIFS_I(inode)->invalid_mapping)
1772 rc = cifs_invalidate_mapping(inode);
1773 return rc;
1774}
1775
1776/* revalidate a dentry's inode attributes */
1777int cifs_revalidate_dentry(struct dentry *dentry)
1778{
1779 int rc;
1780 struct inode *inode = dentry->d_inode;
1781
1782 rc = cifs_revalidate_dentry_attr(dentry);
1783 if (rc)
1784 return rc;
1785
1786 if (CIFS_I(inode)->invalid_mapping)
1787 rc = cifs_invalidate_mapping(inode);
1788 return rc;
1789}
1790
1768int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1791int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1769 struct kstat *stat) 1792 struct kstat *stat)
1770{ 1793{
1771 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1794 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1772 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1795 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1773 int err = cifs_revalidate_dentry(dentry); 1796 struct inode *inode = dentry->d_inode;
1774 1797 int rc;
1775 if (!err) {
1776 generic_fillattr(dentry->d_inode, stat);
1777 stat->blksize = CIFS_MAX_MSGSIZE;
1778 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1779 1798
1780 /* 1799 /*
1781 * If on a multiuser mount without unix extensions, and the 1800 * We need to be sure that all dirty pages are written and the server
1782 * admin hasn't overridden them, set the ownership to the 1801 * has actual ctime, mtime and file length.
1783 * fsuid/fsgid of the current process. 1802 */
1784 */ 1803 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
1785 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && 1804 inode->i_mapping->nrpages != 0) {
1786 !tcon->unix_ext) { 1805 rc = filemap_fdatawait(inode->i_mapping);
1787 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) 1806 if (rc) {
1788 stat->uid = current_fsuid(); 1807 mapping_set_error(inode->i_mapping, rc);
1789 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) 1808 return rc;
1790 stat->gid = current_fsgid();
1791 } 1809 }
1792 } 1810 }
1793 return err; 1811
1812 rc = cifs_revalidate_dentry_attr(dentry);
1813 if (rc)
1814 return rc;
1815
1816 generic_fillattr(inode, stat);
1817 stat->blksize = CIFS_MAX_MSGSIZE;
1818 stat->ino = CIFS_I(inode)->uniqueid;
1819
1820 /*
1821 * If on a multiuser mount without unix extensions, and the admin hasn't
1822 * overridden them, set the ownership to the fsuid/fsgid of the current
1823 * process.
1824 */
1825 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
1826 !tcon->unix_ext) {
1827 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
1828 stat->uid = current_fsuid();
1829 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
1830 stat->gid = current_fsgid();
1831 }
1832 return rc;
1794} 1833}
1795 1834
1796static int cifs_truncate_page(struct address_space *mapping, loff_t from) 1835static int cifs_truncate_page(struct address_space *mapping, loff_t from)
@@ -1831,7 +1870,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1831 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1870 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1832 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1871 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1833 struct tcon_link *tlink = NULL; 1872 struct tcon_link *tlink = NULL;
1834 struct cifsTconInfo *pTcon = NULL; 1873 struct cifs_tcon *pTcon = NULL;
1874 struct cifs_io_parms io_parms;
1835 1875
1836 /* 1876 /*
1837 * To avoid spurious oplock breaks from server, in the case of 1877 * To avoid spurious oplock breaks from server, in the case of
@@ -1853,8 +1893,14 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1853 cFYI(1, "SetFSize for attrs rc = %d", rc); 1893 cFYI(1, "SetFSize for attrs rc = %d", rc);
1854 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1894 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1855 unsigned int bytes_written; 1895 unsigned int bytes_written;
1856 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, 1896
1857 &bytes_written, NULL, NULL, 1); 1897 io_parms.netfid = nfid;
1898 io_parms.pid = npid;
1899 io_parms.tcon = pTcon;
1900 io_parms.offset = 0;
1901 io_parms.length = attrs->ia_size;
1902 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
1903 NULL, NULL, 1);
1858 cFYI(1, "Wrt seteof rc %d", rc); 1904 cFYI(1, "Wrt seteof rc %d", rc);
1859 } 1905 }
1860 } else 1906 } else
@@ -1889,10 +1935,15 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1889 CIFS_MOUNT_MAP_SPECIAL_CHR); 1935 CIFS_MOUNT_MAP_SPECIAL_CHR);
1890 if (rc == 0) { 1936 if (rc == 0) {
1891 unsigned int bytes_written; 1937 unsigned int bytes_written;
1892 rc = CIFSSMBWrite(xid, pTcon, netfid, 0, 1938
1893 attrs->ia_size, 1939 io_parms.netfid = netfid;
1894 &bytes_written, NULL, 1940 io_parms.pid = current->tgid;
1895 NULL, 1); 1941 io_parms.tcon = pTcon;
1942 io_parms.offset = 0;
1943 io_parms.length = attrs->ia_size;
1944 rc = CIFSSMBWrite(xid, &io_parms,
1945 &bytes_written,
1946 NULL, NULL, 1);
1896 cFYI(1, "wrt seteof rc %d", rc); 1947 cFYI(1, "wrt seteof rc %d", rc);
1897 CIFSSMBClose(xid, pTcon, netfid); 1948 CIFSSMBClose(xid, pTcon, netfid);
1898 } 1949 }
@@ -1920,7 +1971,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1920 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1971 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1921 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1972 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1922 struct tcon_link *tlink; 1973 struct tcon_link *tlink;
1923 struct cifsTconInfo *pTcon; 1974 struct cifs_tcon *pTcon;
1924 struct cifs_unix_set_info_args *args = NULL; 1975 struct cifs_unix_set_info_args *args = NULL;
1925 struct cifsFileInfo *open_file; 1976 struct cifsFileInfo *open_file;
1926 1977
@@ -2206,7 +2257,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
2206{ 2257{
2207 struct inode *inode = direntry->d_inode; 2258 struct inode *inode = direntry->d_inode;
2208 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2259 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2209 struct cifsTconInfo *pTcon = cifs_sb_master_tcon(cifs_sb); 2260 struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
2210 2261
2211 if (pTcon->unix_ext) 2262 if (pTcon->unix_ext)
2212 return cifs_setattr_unix(direntry, attrs); 2263 return cifs_setattr_unix(direntry, attrs);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 0c98672d0122..4221b5e48a42 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,7 +38,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
38 struct cifs_sb_info *cifs_sb; 38 struct cifs_sb_info *cifs_sb;
39#ifdef CONFIG_CIFS_POSIX 39#ifdef CONFIG_CIFS_POSIX
40 struct cifsFileInfo *pSMBFile = filep->private_data; 40 struct cifsFileInfo *pSMBFile = filep->private_data;
41 struct cifsTconInfo *tcon; 41 struct cifs_tcon *tcon;
42 __u64 ExtAttrBits = 0; 42 __u64 ExtAttrBits = 0;
43 __u64 ExtAttrMask = 0; 43 __u64 ExtAttrMask = 0;
44 __u64 caps; 44 __u64 caps;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index ce417a9764a3..556b1a0b54de 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -175,7 +175,7 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
175} 175}
176 176
177static int 177static int
178CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon, 178CIFSCreateMFSymLink(const int xid, struct cifs_tcon *tcon,
179 const char *fromName, const char *toName, 179 const char *fromName, const char *toName,
180 const struct nls_table *nls_codepage, int remap) 180 const struct nls_table *nls_codepage, int remap)
181{ 181{
@@ -184,6 +184,7 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
184 __u16 netfid = 0; 184 __u16 netfid = 0;
185 u8 *buf; 185 u8 *buf;
186 unsigned int bytes_written = 0; 186 unsigned int bytes_written = 0;
187 struct cifs_io_parms io_parms;
187 188
188 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); 189 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
189 if (!buf) 190 if (!buf)
@@ -203,10 +204,13 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
203 return rc; 204 return rc;
204 } 205 }
205 206
206 rc = CIFSSMBWrite(xid, tcon, netfid, 207 io_parms.netfid = netfid;
207 CIFS_MF_SYMLINK_FILE_SIZE /* length */, 208 io_parms.pid = current->tgid;
208 0 /* offset */, 209 io_parms.tcon = tcon;
209 &bytes_written, buf, NULL, 0); 210 io_parms.offset = 0;
211 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
212
213 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, buf, NULL, 0);
210 CIFSSMBClose(xid, tcon, netfid); 214 CIFSSMBClose(xid, tcon, netfid);
211 kfree(buf); 215 kfree(buf);
212 if (rc != 0) 216 if (rc != 0)
@@ -219,7 +223,7 @@ CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon,
219} 223}
220 224
221static int 225static int
222CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon, 226CIFSQueryMFSymLink(const int xid, struct cifs_tcon *tcon,
223 const unsigned char *searchName, char **symlinkinfo, 227 const unsigned char *searchName, char **symlinkinfo,
224 const struct nls_table *nls_codepage, int remap) 228 const struct nls_table *nls_codepage, int remap)
225{ 229{
@@ -231,6 +235,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
231 unsigned int bytes_read = 0; 235 unsigned int bytes_read = 0;
232 int buf_type = CIFS_NO_BUFFER; 236 int buf_type = CIFS_NO_BUFFER;
233 unsigned int link_len = 0; 237 unsigned int link_len = 0;
238 struct cifs_io_parms io_parms;
234 FILE_ALL_INFO file_info; 239 FILE_ALL_INFO file_info;
235 240
236 rc = CIFSSMBOpen(xid, tcon, searchName, FILE_OPEN, GENERIC_READ, 241 rc = CIFSSMBOpen(xid, tcon, searchName, FILE_OPEN, GENERIC_READ,
@@ -249,11 +254,13 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
249 if (!buf) 254 if (!buf)
250 return -ENOMEM; 255 return -ENOMEM;
251 pbuf = buf; 256 pbuf = buf;
257 io_parms.netfid = netfid;
258 io_parms.pid = current->tgid;
259 io_parms.tcon = tcon;
260 io_parms.offset = 0;
261 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
252 262
253 rc = CIFSSMBRead(xid, tcon, netfid, 263 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
254 CIFS_MF_SYMLINK_FILE_SIZE /* length */,
255 0 /* offset */,
256 &bytes_read, &pbuf, &buf_type);
257 CIFSSMBClose(xid, tcon, netfid); 264 CIFSSMBClose(xid, tcon, netfid);
258 if (rc != 0) { 265 if (rc != 0) {
259 kfree(buf); 266 kfree(buf);
@@ -291,7 +298,8 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
291 int oplock = 0; 298 int oplock = 0;
292 __u16 netfid = 0; 299 __u16 netfid = 0;
293 struct tcon_link *tlink; 300 struct tcon_link *tlink;
294 struct cifsTconInfo *pTcon; 301 struct cifs_tcon *pTcon;
302 struct cifs_io_parms io_parms;
295 u8 *buf; 303 u8 *buf;
296 char *pbuf; 304 char *pbuf;
297 unsigned int bytes_read = 0; 305 unsigned int bytes_read = 0;
@@ -328,11 +336,13 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
328 goto out; 336 goto out;
329 } 337 }
330 pbuf = buf; 338 pbuf = buf;
339 io_parms.netfid = netfid;
340 io_parms.pid = current->tgid;
341 io_parms.tcon = pTcon;
342 io_parms.offset = 0;
343 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
331 344
332 rc = CIFSSMBRead(xid, pTcon, netfid, 345 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
333 CIFS_MF_SYMLINK_FILE_SIZE /* length */,
334 0 /* offset */,
335 &bytes_read, &pbuf, &buf_type);
336 CIFSSMBClose(xid, pTcon, netfid); 346 CIFSSMBClose(xid, pTcon, netfid);
337 if (rc != 0) { 347 if (rc != 0) {
338 kfree(buf); 348 kfree(buf);
@@ -370,7 +380,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
370 char *toName = NULL; 380 char *toName = NULL;
371 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 381 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
372 struct tcon_link *tlink; 382 struct tcon_link *tlink;
373 struct cifsTconInfo *pTcon; 383 struct cifs_tcon *pTcon;
374 struct cifsInodeInfo *cifsInode; 384 struct cifsInodeInfo *cifsInode;
375 385
376 tlink = cifs_sb_tlink(cifs_sb); 386 tlink = cifs_sb_tlink(cifs_sb);
@@ -445,7 +455,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
445 char *target_path = NULL; 455 char *target_path = NULL;
446 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 456 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
447 struct tcon_link *tlink = NULL; 457 struct tcon_link *tlink = NULL;
448 struct cifsTconInfo *tcon; 458 struct cifs_tcon *tcon;
449 459
450 xid = GetXid(); 460 xid = GetXid();
451 461
@@ -518,7 +528,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
518 int xid; 528 int xid;
519 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 529 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
520 struct tcon_link *tlink; 530 struct tcon_link *tlink;
521 struct cifsTconInfo *pTcon; 531 struct cifs_tcon *pTcon;
522 char *full_path = NULL; 532 char *full_path = NULL;
523 struct inode *newinode = NULL; 533 struct inode *newinode = NULL;
524 534
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..03a1f491d39b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -67,12 +67,12 @@ _FreeXid(unsigned int xid)
67 spin_unlock(&GlobalMid_Lock); 67 spin_unlock(&GlobalMid_Lock);
68} 68}
69 69
70struct cifsSesInfo * 70struct cifs_ses *
71sesInfoAlloc(void) 71sesInfoAlloc(void)
72{ 72{
73 struct cifsSesInfo *ret_buf; 73 struct cifs_ses *ret_buf;
74 74
75 ret_buf = kzalloc(sizeof(struct cifsSesInfo), GFP_KERNEL); 75 ret_buf = kzalloc(sizeof(struct cifs_ses), GFP_KERNEL);
76 if (ret_buf) { 76 if (ret_buf) {
77 atomic_inc(&sesInfoAllocCount); 77 atomic_inc(&sesInfoAllocCount);
78 ret_buf->status = CifsNew; 78 ret_buf->status = CifsNew;
@@ -85,7 +85,7 @@ sesInfoAlloc(void)
85} 85}
86 86
87void 87void
88sesInfoFree(struct cifsSesInfo *buf_to_free) 88sesInfoFree(struct cifs_ses *buf_to_free)
89{ 89{
90 if (buf_to_free == NULL) { 90 if (buf_to_free == NULL) {
91 cFYI(1, "Null buffer passed to sesInfoFree"); 91 cFYI(1, "Null buffer passed to sesInfoFree");
@@ -105,11 +105,11 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
105 kfree(buf_to_free); 105 kfree(buf_to_free);
106} 106}
107 107
108struct cifsTconInfo * 108struct cifs_tcon *
109tconInfoAlloc(void) 109tconInfoAlloc(void)
110{ 110{
111 struct cifsTconInfo *ret_buf; 111 struct cifs_tcon *ret_buf;
112 ret_buf = kzalloc(sizeof(struct cifsTconInfo), GFP_KERNEL); 112 ret_buf = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
113 if (ret_buf) { 113 if (ret_buf) {
114 atomic_inc(&tconInfoAllocCount); 114 atomic_inc(&tconInfoAllocCount);
115 ret_buf->tidStatus = CifsNew; 115 ret_buf->tidStatus = CifsNew;
@@ -124,7 +124,7 @@ tconInfoAlloc(void)
124} 124}
125 125
126void 126void
127tconInfoFree(struct cifsTconInfo *buf_to_free) 127tconInfoFree(struct cifs_tcon *buf_to_free)
128{ 128{
129 if (buf_to_free == NULL) { 129 if (buf_to_free == NULL) {
130 cFYI(1, "Null buffer passed to tconInfoFree"); 130 cFYI(1, "Null buffer passed to tconInfoFree");
@@ -295,21 +295,19 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
295 case it is responsbility of caller to set the mid */ 295 case it is responsbility of caller to set the mid */
296void 296void
297header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , 297header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
298 const struct cifsTconInfo *treeCon, int word_count 298 const struct cifs_tcon *treeCon, int word_count
299 /* length of fixed section (word count) in two byte units */) 299 /* length of fixed section (word count) in two byte units */)
300{ 300{
301 struct list_head *temp_item; 301 struct list_head *temp_item;
302 struct cifsSesInfo *ses; 302 struct cifs_ses *ses;
303 char *temp = (char *) buffer; 303 char *temp = (char *) buffer;
304 304
305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
306 306
307 buffer->smb_buf_length = 307 buffer->smb_buf_length = cpu_to_be32(
308 (2 * word_count) + sizeof(struct smb_hdr) - 308 (2 * word_count) + sizeof(struct smb_hdr) -
309 4 /* RFC 1001 length field does not count */ + 309 4 /* RFC 1001 length field does not count */ +
310 2 /* for bcc field itself */ ; 310 2 /* for bcc field itself */) ;
311 /* Note that this is the only network field that has to be converted
312 to big endian and it is done just before we send it */
313 311
314 buffer->Protocol[0] = 0xFF; 312 buffer->Protocol[0] = 0xFF;
315 buffer->Protocol[1] = 'S'; 313 buffer->Protocol[1] = 'S';
@@ -361,7 +359,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
361 "did not match tcon uid"); 359 "did not match tcon uid");
362 spin_lock(&cifs_tcp_ses_lock); 360 spin_lock(&cifs_tcp_ses_lock);
363 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { 361 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
364 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); 362 ses = list_entry(temp_item, struct cifs_ses, smb_ses_list);
365 if (ses->linux_uid == current_fsuid()) { 363 if (ses->linux_uid == current_fsuid()) {
366 if (ses->server == treeCon->ses->server) { 364 if (ses->server == treeCon->ses->server) {
367 cFYI(1, "found matching uid substitute right smb_uid"); 365 cFYI(1, "found matching uid substitute right smb_uid");
@@ -382,7 +380,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
382 if (treeCon->nocase) 380 if (treeCon->nocase)
383 buffer->Flags |= SMBFLG_CASELESS; 381 buffer->Flags |= SMBFLG_CASELESS;
384 if ((treeCon->ses) && (treeCon->ses->server)) 382 if ((treeCon->ses) && (treeCon->ses->server))
385 if (treeCon->ses->server->secMode & 383 if (treeCon->ses->server->sec_mode &
386 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 384 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
387 buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 385 buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
388 } 386 }
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
424int 422int
425checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) 423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
426{ 424{
427 __u32 len = smb->smb_buf_length; 425 __u32 len = be32_to_cpu(smb->smb_buf_length);
428 __u32 clc_len; /* calculated length */ 426 __u32 clc_len; /* calculated length */
429 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); 427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
430 428
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
464 462
465 if (check_smb_hdr(smb, mid)) 463 if (check_smb_hdr(smb, mid))
466 return 1; 464 return 1;
467 clc_len = smbCalcSize_LE(smb); 465 clc_len = smbCalcSize(smb);
468 466
469 if (4 + len != length) { 467 if (4 + len != length) {
470 cERROR(1, "Length read does not match RFC1001 length %d", 468 cERROR(1, "Length read does not match RFC1001 length %d",
@@ -509,8 +507,8 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
509{ 507{
510 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; 508 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
511 struct list_head *tmp, *tmp1, *tmp2; 509 struct list_head *tmp, *tmp1, *tmp2;
512 struct cifsSesInfo *ses; 510 struct cifs_ses *ses;
513 struct cifsTconInfo *tcon; 511 struct cifs_tcon *tcon;
514 struct cifsInodeInfo *pCifsInode; 512 struct cifsInodeInfo *pCifsInode;
515 struct cifsFileInfo *netfile; 513 struct cifsFileInfo *netfile;
516 514
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
521 (struct smb_com_transaction_change_notify_rsp *)buf; 519 (struct smb_com_transaction_change_notify_rsp *)buf;
522 struct file_notify_information *pnotify; 520 struct file_notify_information *pnotify;
523 __u32 data_offset = 0; 521 __u32 data_offset = 0;
524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) { 522 if (get_bcc(buf) > sizeof(struct file_notify_information)) {
525 data_offset = le32_to_cpu(pSMBr->DataOffset); 523 data_offset = le32_to_cpu(pSMBr->DataOffset);
526 524
527 pnotify = (struct file_notify_information *) 525 pnotify = (struct file_notify_information *)
@@ -568,9 +566,9 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
568 /* look up tcon based on tid & uid */ 566 /* look up tcon based on tid & uid */
569 spin_lock(&cifs_tcp_ses_lock); 567 spin_lock(&cifs_tcp_ses_lock);
570 list_for_each(tmp, &srv->smb_ses_list) { 568 list_for_each(tmp, &srv->smb_ses_list) {
571 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); 569 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
572 list_for_each(tmp1, &ses->tcon_list) { 570 list_for_each(tmp1, &ses->tcon_list) {
573 tcon = list_entry(tmp1, struct cifsTconInfo, tcon_list); 571 tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
574 if (tcon->tid != buf->Tid) 572 if (tcon->tid != buf->Tid)
575 continue; 573 continue;
576 574
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..73e47e84b61a 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -836,7 +836,7 @@ ntstatus_to_dos(__u32 ntstatus, __u8 *eclass, __u16 *ecode)
836} 836}
837 837
838int 838int
839map_smb_to_linux_error(struct smb_hdr *smb, int logErr) 839map_smb_to_linux_error(struct smb_hdr *smb, bool logErr)
840{ 840{
841 unsigned int i; 841 unsigned int i;
842 int rc = -EIO; /* if transport error smb error may not be set */ 842 int rc = -EIO; /* if transport error smb error may not be set */
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
919 2 /* size of the bcc field */ + get_bcc(ptr)); 919 2 /* size of the bcc field */ + get_bcc(ptr));
920} 920}
921 921
922unsigned int
923smbCalcSize_LE(struct smb_hdr *ptr)
924{
925 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
926 2 /* size of the bcc field */ + get_bcc_le(ptr));
927}
928
929/* The following are taken from fs/ntfs/util.c */ 922/* The following are taken from fs/ntfs/util.c */
930 923
931#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 924#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f8e4cd2a7912..6751e745bbc6 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -195,7 +195,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
195 int len; 195 int len;
196 int oplock = 0; 196 int oplock = 0;
197 int rc; 197 int rc;
198 struct cifsTconInfo *ptcon = cifs_sb_tcon(cifs_sb); 198 struct cifs_tcon *ptcon = cifs_sb_tcon(cifs_sb);
199 char *tmpbuffer; 199 char *tmpbuffer;
200 200
201 rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, 201 rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ,
@@ -223,7 +223,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
223 struct cifsFileInfo *cifsFile; 223 struct cifsFileInfo *cifsFile;
224 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 224 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
225 struct tcon_link *tlink = NULL; 225 struct tcon_link *tlink = NULL;
226 struct cifsTconInfo *pTcon; 226 struct cifs_tcon *pTcon;
227 227
228 if (file->private_data == NULL) { 228 if (file->private_data == NULL) {
229 tlink = cifs_sb_tlink(cifs_sb); 229 tlink = cifs_sb_tlink(cifs_sb);
@@ -496,7 +496,7 @@ static int cifs_save_resume_key(const char *current_entry,
496 assume that they are located in the findfirst return buffer.*/ 496 assume that they are located in the findfirst return buffer.*/
497/* We start counting in the buffer with entry 2 and increment for every 497/* We start counting in the buffer with entry 2 and increment for every
498 entry (do not increment for . or .. entry) */ 498 entry (do not increment for . or .. entry) */
499static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, 499static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
500 struct file *file, char **ppCurrentEntry, int *num_to_ret) 500 struct file *file, char **ppCurrentEntry, int *num_to_ret)
501{ 501{
502 int rc = 0; 502 int rc = 0;
@@ -764,7 +764,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
764{ 764{
765 int rc = 0; 765 int rc = 0;
766 int xid, i; 766 int xid, i;
767 struct cifsTconInfo *pTcon; 767 struct cifs_tcon *pTcon;
768 struct cifsFileInfo *cifsFile = NULL; 768 struct cifsFileInfo *cifsFile = NULL;
769 char *current_entry; 769 char *current_entry;
770 int num_to_fill = 0; 770 int num_to_fill = 0;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 645114ad0a10..3892ab817a36 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -37,13 +37,13 @@
37 * the socket has been reestablished (so we know whether to use vc 0). 37 * the socket has been reestablished (so we know whether to use vc 0).
38 * Called while holding the cifs_tcp_ses_lock, so do not block 38 * Called while holding the cifs_tcp_ses_lock, so do not block
39 */ 39 */
40static bool is_first_ses_reconnect(struct cifsSesInfo *ses) 40static bool is_first_ses_reconnect(struct cifs_ses *ses)
41{ 41{
42 struct list_head *tmp; 42 struct list_head *tmp;
43 struct cifsSesInfo *tmp_ses; 43 struct cifs_ses *tmp_ses;
44 44
45 list_for_each(tmp, &ses->server->smb_ses_list) { 45 list_for_each(tmp, &ses->server->smb_ses_list) {
46 tmp_ses = list_entry(tmp, struct cifsSesInfo, 46 tmp_ses = list_entry(tmp, struct cifs_ses,
47 smb_ses_list); 47 smb_ses_list);
48 if (tmp_ses->need_reconnect == false) 48 if (tmp_ses->need_reconnect == false)
49 return false; 49 return false;
@@ -61,11 +61,11 @@ static bool is_first_ses_reconnect(struct cifsSesInfo *ses)
61 * any vc but zero (some servers reset the connection on vcnum zero) 61 * any vc but zero (some servers reset the connection on vcnum zero)
62 * 62 *
63 */ 63 */
64static __le16 get_next_vcnum(struct cifsSesInfo *ses) 64static __le16 get_next_vcnum(struct cifs_ses *ses)
65{ 65{
66 __u16 vcnum = 0; 66 __u16 vcnum = 0;
67 struct list_head *tmp; 67 struct list_head *tmp;
68 struct cifsSesInfo *tmp_ses; 68 struct cifs_ses *tmp_ses;
69 __u16 max_vcs = ses->server->max_vcs; 69 __u16 max_vcs = ses->server->max_vcs;
70 __u16 i; 70 __u16 i;
71 int free_vc_found = 0; 71 int free_vc_found = 0;
@@ -87,7 +87,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses)
87 free_vc_found = 1; 87 free_vc_found = 1;
88 88
89 list_for_each(tmp, &ses->server->smb_ses_list) { 89 list_for_each(tmp, &ses->server->smb_ses_list) {
90 tmp_ses = list_entry(tmp, struct cifsSesInfo, 90 tmp_ses = list_entry(tmp, struct cifs_ses,
91 smb_ses_list); 91 smb_ses_list);
92 if (tmp_ses->vcnum == i) { 92 if (tmp_ses->vcnum == i) {
93 free_vc_found = 0; 93 free_vc_found = 0;
@@ -114,7 +114,7 @@ get_vc_num_exit:
114 return cpu_to_le16(vcnum); 114 return cpu_to_le16(vcnum);
115} 115}
116 116
117static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) 117static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
118{ 118{
119 __u32 capabilities = 0; 119 __u32 capabilities = 0;
120 120
@@ -136,7 +136,7 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
136 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | 136 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
137 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; 137 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
138 138
139 if (ses->server->secMode & 139 if (ses->server->sec_mode &
140 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 140 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
141 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 141 pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
142 142
@@ -181,7 +181,7 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
181 *pbcc_area = bcc_ptr; 181 *pbcc_area = bcc_ptr;
182} 182}
183 183
184static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses, 184static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
185 const struct nls_table *nls_cp) 185 const struct nls_table *nls_cp)
186{ 186{
187 char *bcc_ptr = *pbcc_area; 187 char *bcc_ptr = *pbcc_area;
@@ -204,7 +204,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
204} 204}
205 205
206 206
207static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, 207static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
208 const struct nls_table *nls_cp) 208 const struct nls_table *nls_cp)
209{ 209{
210 char *bcc_ptr = *pbcc_area; 210 char *bcc_ptr = *pbcc_area;
@@ -236,7 +236,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
236 *pbcc_area = bcc_ptr; 236 *pbcc_area = bcc_ptr;
237} 237}
238 238
239static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, 239static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
240 const struct nls_table *nls_cp) 240 const struct nls_table *nls_cp)
241{ 241{
242 char *bcc_ptr = *pbcc_area; 242 char *bcc_ptr = *pbcc_area;
@@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
276} 276}
277 277
278static void 278static void
279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, 279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses,
280 const struct nls_table *nls_cp) 280 const struct nls_table *nls_cp)
281{ 281{
282 int len; 282 int len;
@@ -310,7 +310,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
310} 310}
311 311
312static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, 312static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
313 struct cifsSesInfo *ses, 313 struct cifs_ses *ses,
314 const struct nls_table *nls_cp) 314 const struct nls_table *nls_cp)
315{ 315{
316 int rc = 0; 316 int rc = 0;
@@ -364,7 +364,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
364} 364}
365 365
366static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, 366static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
367 struct cifsSesInfo *ses) 367 struct cifs_ses *ses)
368{ 368{
369 unsigned int tioffset; /* challenge message target info area */ 369 unsigned int tioffset; /* challenge message target info area */
370 unsigned int tilen; /* challenge message target info area length */ 370 unsigned int tilen; /* challenge message target info area length */
@@ -411,7 +411,7 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
411/* We do not malloc the blob, it is passed in pbuffer, because 411/* We do not malloc the blob, it is passed in pbuffer, because
412 it is fixed size, and small, making this approach cleaner */ 412 it is fixed size, and small, making this approach cleaner */
413static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, 413static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
414 struct cifsSesInfo *ses) 414 struct cifs_ses *ses)
415{ 415{
416 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; 416 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
417 __u32 flags; 417 __u32 flags;
@@ -424,7 +424,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
424 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | 424 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
425 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 425 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
426 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 426 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
427 if (ses->server->secMode & 427 if (ses->server->sec_mode &
428 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 428 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
429 flags |= NTLMSSP_NEGOTIATE_SIGN; 429 flags |= NTLMSSP_NEGOTIATE_SIGN;
430 if (!ses->server->session_estab) 430 if (!ses->server->session_estab)
@@ -449,7 +449,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
449 This function returns the length of the data in the blob */ 449 This function returns the length of the data in the blob */
450static int build_ntlmssp_auth_blob(unsigned char *pbuffer, 450static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
451 u16 *buflen, 451 u16 *buflen,
452 struct cifsSesInfo *ses, 452 struct cifs_ses *ses,
453 const struct nls_table *nls_cp) 453 const struct nls_table *nls_cp)
454{ 454{
455 int rc; 455 int rc;
@@ -464,10 +464,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
464 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | 464 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
465 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 465 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
466 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; 466 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
467 if (ses->server->secMode & 467 if (ses->server->sec_mode &
468 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 468 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
469 flags |= NTLMSSP_NEGOTIATE_SIGN; 469 flags |= NTLMSSP_NEGOTIATE_SIGN;
470 if (ses->server->secMode & SECMODE_SIGN_REQUIRED) 470 if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
471 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; 471 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
472 472
473 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); 473 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
@@ -551,7 +551,7 @@ setup_ntlmv2_ret:
551} 551}
552 552
553int 553int
554CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 554CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses,
555 const struct nls_table *nls_cp) 555 const struct nls_table *nls_cp)
556{ 556{
557 int rc = 0; 557 int rc = 0;
@@ -621,7 +621,7 @@ ssetup_ntlmssp_authenticate:
621 and rest of bcc area. This allows us to avoid 621 and rest of bcc area. This allows us to avoid
622 a large buffer 17K allocation */ 622 a large buffer 17K allocation */
623 iov[0].iov_base = (char *)pSMB; 623 iov[0].iov_base = (char *)pSMB;
624 iov[0].iov_len = smb_buf->smb_buf_length + 4; 624 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
625 625
626 /* setting this here allows the code at the end of the function 626 /* setting this here allows the code at the end of the function
627 to free the request buffer if there's an error */ 627 to free the request buffer if there's an error */
@@ -656,8 +656,8 @@ ssetup_ntlmssp_authenticate:
656 * to use challenge/response method (i.e. Password bit is 1). 656 * to use challenge/response method (i.e. Password bit is 1).
657 */ 657 */
658 658
659 calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
660 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
661 true : false, lnm_session_key); 661 true : false, lnm_session_key);
662 662
663 ses->flags |= CIFS_SES_LANMAN; 663 ses->flags |= CIFS_SES_LANMAN;
@@ -859,9 +859,10 @@ ssetup_ntlmssp_authenticate:
859 iov[2].iov_len = (long) bcc_ptr - (long) str_area; 859 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
860 860
861 count = iov[1].iov_len + iov[2].iov_len; 861 count = iov[1].iov_len + iov[2].iov_len;
862 smb_buf->smb_buf_length += count; 862 smb_buf->smb_buf_length =
863 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
863 864
864 put_bcc_le(count, smb_buf); 865 put_bcc(count, smb_buf);
865 866
866 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 867 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
867 CIFS_LOG_ERROR); 868 CIFS_LOG_ERROR);
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4
5 a partial implementation of DES designed for use in the
6 SMB authentication protocol
7
8 Copyright (C) Andrew Tridgell 1998
9 Modified by Steve French (sfrench@us.ibm.com) 2002,2004
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26/* NOTES:
27
28 This code makes no attempt to be fast! In fact, it is a very
29 slow implementation
30
31 This code is NOT a complete DES implementation. It implements only
32 the minimum necessary for SMB authentication, as used by all SMB
33 products (including every copy of Microsoft Windows95 ever sold)
34
35 In particular, it can only do a unchained forward DES pass. This
36 means it is not possible to use this code for encryption/decryption
37 of data, instead it is only useful as a "hash" algorithm.
38
39 There is no entry point into this code that allows normal DES operation.
40
41 I believe this means that this code does not come under ITAR
42 regulations but this is NOT a legal opinion. If you are concerned
43 about the applicability of ITAR regulations to this code then you
44 should confirm it for yourself (and maybe let me know if you come
45 up with a different answer to the one above)
46*/
47#include <linux/slab.h>
48#define uchar unsigned char
49
50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
51 1, 58, 50, 42, 34, 26, 18,
52 10, 2, 59, 51, 43, 35, 27,
53 19, 11, 3, 60, 52, 44, 36,
54 63, 55, 47, 39, 31, 23, 15,
55 7, 62, 54, 46, 38, 30, 22,
56 14, 6, 61, 53, 45, 37, 29,
57 21, 13, 5, 28, 20, 12, 4
58};
59
60static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
61 3, 28, 15, 6, 21, 10,
62 23, 19, 12, 4, 26, 8,
63 16, 7, 27, 20, 13, 2,
64 41, 52, 31, 37, 47, 55,
65 30, 40, 51, 45, 33, 48,
66 44, 49, 39, 56, 34, 53,
67 46, 42, 50, 36, 29, 32
68};
69
70static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
71 60, 52, 44, 36, 28, 20, 12, 4,
72 62, 54, 46, 38, 30, 22, 14, 6,
73 64, 56, 48, 40, 32, 24, 16, 8,
74 57, 49, 41, 33, 25, 17, 9, 1,
75 59, 51, 43, 35, 27, 19, 11, 3,
76 61, 53, 45, 37, 29, 21, 13, 5,
77 63, 55, 47, 39, 31, 23, 15, 7
78};
79
80static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
81 4, 5, 6, 7, 8, 9,
82 8, 9, 10, 11, 12, 13,
83 12, 13, 14, 15, 16, 17,
84 16, 17, 18, 19, 20, 21,
85 20, 21, 22, 23, 24, 25,
86 24, 25, 26, 27, 28, 29,
87 28, 29, 30, 31, 32, 1
88};
89
90static uchar perm5[32] = { 16, 7, 20, 21,
91 29, 12, 28, 17,
92 1, 15, 23, 26,
93 5, 18, 31, 10,
94 2, 8, 24, 14,
95 32, 27, 3, 9,
96 19, 13, 30, 6,
97 22, 11, 4, 25
98};
99
100static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
101 39, 7, 47, 15, 55, 23, 63, 31,
102 38, 6, 46, 14, 54, 22, 62, 30,
103 37, 5, 45, 13, 53, 21, 61, 29,
104 36, 4, 44, 12, 52, 20, 60, 28,
105 35, 3, 43, 11, 51, 19, 59, 27,
106 34, 2, 42, 10, 50, 18, 58, 26,
107 33, 1, 41, 9, 49, 17, 57, 25
108};
109
110static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
111
112static uchar sbox[8][4][16] = {
113 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
114 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
115 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
116 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
117
118 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
119 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
120 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
121 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
122
123 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
124 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
125 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
126 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
127
128 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
129 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
130 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
131 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
132
133 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
134 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
135 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
136 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
137
138 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
139 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
140 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
141 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
142
143 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
144 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
145 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
146 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
147
148 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
149 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
150 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
151 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
152};
153
154static void
155permute(char *out, char *in, uchar *p, int n)
156{
157 int i;
158 for (i = 0; i < n; i++)
159 out[i] = in[p[i] - 1];
160}
161
162static void
163lshift(char *d, int count, int n)
164{
165 char out[64];
166 int i;
167 for (i = 0; i < n; i++)
168 out[i] = d[(i + count) % n];
169 for (i = 0; i < n; i++)
170 d[i] = out[i];
171}
172
173static void
174concat(char *out, char *in1, char *in2, int l1, int l2)
175{
176 while (l1--)
177 *out++ = *in1++;
178 while (l2--)
179 *out++ = *in2++;
180}
181
182static void
183xor(char *out, char *in1, char *in2, int n)
184{
185 int i;
186 for (i = 0; i < n; i++)
187 out[i] = in1[i] ^ in2[i];
188}
189
190static void
191dohash(char *out, char *in, char *key, int forw)
192{
193 int i, j, k;
194 char *pk1;
195 char c[28];
196 char d[28];
197 char *cd;
198 char (*ki)[48];
199 char *pd1;
200 char l[32], r[32];
201 char *rl;
202
203 /* Have to reduce stack usage */
204 pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
205 if (pk1 == NULL)
206 return;
207
208 ki = kmalloc(16*48, GFP_KERNEL);
209 if (ki == NULL) {
210 kfree(pk1);
211 return;
212 }
213
214 cd = pk1 + 56;
215 pd1 = cd + 56;
216 rl = pd1 + 64;
217
218 permute(pk1, key, perm1, 56);
219
220 for (i = 0; i < 28; i++)
221 c[i] = pk1[i];
222 for (i = 0; i < 28; i++)
223 d[i] = pk1[i + 28];
224
225 for (i = 0; i < 16; i++) {
226 lshift(c, sc[i], 28);
227 lshift(d, sc[i], 28);
228
229 concat(cd, c, d, 28, 28);
230 permute(ki[i], cd, perm2, 48);
231 }
232
233 permute(pd1, in, perm3, 64);
234
235 for (j = 0; j < 32; j++) {
236 l[j] = pd1[j];
237 r[j] = pd1[j + 32];
238 }
239
240 for (i = 0; i < 16; i++) {
241 char *er; /* er[48] */
242 char *erk; /* erk[48] */
243 char b[8][6];
244 char *cb; /* cb[32] */
245 char *pcb; /* pcb[32] */
246 char *r2; /* r2[32] */
247
248 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
249 if (er == NULL) {
250 kfree(pk1);
251 kfree(ki);
252 return;
253 }
254 erk = er+48;
255 cb = erk+48;
256 pcb = cb+32;
257 r2 = pcb+32;
258
259 permute(er, r, perm4, 48);
260
261 xor(erk, er, ki[forw ? i : 15 - i], 48);
262
263 for (j = 0; j < 8; j++)
264 for (k = 0; k < 6; k++)
265 b[j][k] = erk[j * 6 + k];
266
267 for (j = 0; j < 8; j++) {
268 int m, n;
269 m = (b[j][0] << 1) | b[j][5];
270
271 n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
272 1) | b[j][4];
273
274 for (k = 0; k < 4; k++)
275 b[j][k] =
276 (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
277 }
278
279 for (j = 0; j < 8; j++)
280 for (k = 0; k < 4; k++)
281 cb[j * 4 + k] = b[j][k];
282 permute(pcb, cb, perm5, 32);
283
284 xor(r2, l, pcb, 32);
285
286 for (j = 0; j < 32; j++)
287 l[j] = r[j];
288
289 for (j = 0; j < 32; j++)
290 r[j] = r2[j];
291
292 kfree(er);
293 }
294
295 concat(rl, r, l, 32, 32);
296
297 permute(out, rl, perm6, 64);
298 kfree(pk1);
299 kfree(ki);
300}
301
302static void
303str_to_key(unsigned char *str, unsigned char *key)
304{
305 int i;
306
307 key[0] = str[0] >> 1;
308 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
309 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
310 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
311 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
312 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
313 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
314 key[7] = str[6] & 0x7F;
315 for (i = 0; i < 8; i++)
316 key[i] = (key[i] << 1);
317}
318
319static void
320smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
321 int forw)
322{
323 int i;
324 char *outb; /* outb[64] */
325 char *inb; /* inb[64] */
326 char *keyb; /* keyb[64] */
327 unsigned char key2[8];
328
329 outb = kmalloc(64 * 3, GFP_KERNEL);
330 if (outb == NULL)
331 return;
332
333 inb = outb + 64;
334 keyb = inb + 64;
335
336 str_to_key(key, key2);
337
338 for (i = 0; i < 64; i++) {
339 inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
340 keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
341 outb[i] = 0;
342 }
343
344 dohash(outb, inb, keyb, forw);
345
346 for (i = 0; i < 8; i++)
347 out[i] = 0;
348
349 for (i = 0; i < 64; i++) {
350 if (outb[i])
351 out[i / 8] |= (1 << (7 - (i % 8)));
352 }
353 kfree(outb);
354}
355
356void
357E_P16(unsigned char *p14, unsigned char *p16)
358{
359 unsigned char sp8[8] =
360 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
361 smbhash(p16, sp8, p14, 1);
362 smbhash(p16 + 8, sp8, p14 + 7, 1);
363}
364
365void
366E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{
368 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1);
370 smbhash(p24 + 16, c8, p21 + 14, 1);
371}
372
373#if 0 /* currently unused */
374static void
375D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
376{
377 smbhash(out, in, p14, 0);
378 smbhash(out + 8, in + 8, p14 + 7, 0);
379}
380
381static void
382E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
383{
384 smbhash(out, in, p14, 1);
385 smbhash(out + 8, in + 8, p14 + 7, 1);
386}
387/* these routines are currently unneeded, but may be
388 needed later */
389void
390cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
391{
392 unsigned char buf[8];
393
394 smbhash(buf, in, key, 1);
395 smbhash(out, buf, key + 9, 1);
396}
397
398void
399cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
400{
401 unsigned char buf[8];
402 static unsigned char key2[8];
403
404 smbhash(buf, in, key, 1);
405 key2[0] = key[7];
406 smbhash(out, buf, key2, 1);
407}
408
409void
410cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
411{
412 static unsigned char key2[8];
413
414 smbhash(out, in, key, forw);
415 key2[0] = key[7];
416 smbhash(out + 8, in + 8, key2, forw);
417}
418#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
49 49
50static void
51str_to_key(unsigned char *str, unsigned char *key)
52{
53 int i;
54
55 key[0] = str[0] >> 1;
56 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
57 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
58 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
59 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
60 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
61 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
62 key[7] = str[6] & 0x7F;
63 for (i = 0; i < 8; i++)
64 key[i] = (key[i] << 1);
65}
66
67static int
68smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
69{
70 int rc;
71 unsigned char key2[8];
72 struct crypto_blkcipher *tfm_des;
73 struct scatterlist sgin, sgout;
74 struct blkcipher_desc desc;
75
76 str_to_key(key, key2);
77
78 tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
79 if (IS_ERR(tfm_des)) {
80 rc = PTR_ERR(tfm_des);
81 cERROR(1, "could not allocate des crypto API\n");
82 goto smbhash_err;
83 }
84
85 desc.tfm = tfm_des;
86
87 crypto_blkcipher_setkey(tfm_des, key2, 8);
88
89 sg_init_one(&sgin, in, 8);
90 sg_init_one(&sgout, out, 8);
91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) {
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98
99smbhash_err:
100 return rc;
101}
102
103static int
104E_P16(unsigned char *p14, unsigned char *p16)
105{
106 int rc;
107 unsigned char sp8[8] =
108 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
109
110 rc = smbhash(p16, sp8, p14);
111 if (rc)
112 return rc;
113 rc = smbhash(p16 + 8, sp8, p14 + 7);
114 return rc;
115}
116
117static int
118E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
119{
120 int rc;
121
122 rc = smbhash(p24, c8, p21);
123 if (rc)
124 return rc;
125 rc = smbhash(p24 + 8, c8, p21 + 7);
126 if (rc)
127 return rc;
128 rc = smbhash(p24 + 16, c8, p21 + 14);
129 return rc;
130}
131
50/* produce a md4 message digest from data of length n bytes */ 132/* produce a md4 message digest from data of length n bytes */
51int 133int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) 134mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
87 return rc; 169 return rc;
88} 170}
89 171
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
102
103/* 172/*
104 This implements the X/Open SMB password encryption 173 This implements the X/Open SMB password encryption
105 It takes a password, a 8 byte "crypt key" and puts 24 bytes of 174 It takes a password, a 8 byte "crypt key" and puts 24 bytes of
106 encrypted password into p24 */ 175 encrypted password into p24 */
107/* Note that password must be uppercased and null terminated */ 176/* Note that password must be uppercased and null terminated */
108void 177int
109SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24) 178SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
110{ 179{
111 unsigned char p14[15], p21[21]; 180 int rc;
181 unsigned char p14[14], p16[16], p21[21];
112 182
113 memset(p21, '\0', 21);
114 memset(p14, '\0', 14); 183 memset(p14, '\0', 14);
115 strncpy((char *) p14, (char *) passwd, 14); 184 memset(p16, '\0', 16);
185 memset(p21, '\0', 21);
116 186
117/* strupper((char *)p14); *//* BB at least uppercase the easy range */ 187 memcpy(p14, passwd, 14);
118 E_P16(p14, p21); 188 rc = E_P16(p14, p16);
189 if (rc)
190 return rc;
119 191
120 SMBOWFencrypt(p21, c8, p24); 192 memcpy(p21, p16, 16);
193 rc = E_P24(p21, c8, p24);
121 194
122 memset(p14, 0, 15); 195 return rc;
123 memset(p21, 0, 21);
124} 196}
125 197
126/* Routines for Windows NT MD4 Hash functions. */ 198/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 351SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
280{ 352{
281 int rc; 353 int rc;
282 unsigned char p21[21]; 354 unsigned char p16[16], p21[21];
283 355
356 memset(p16, '\0', 16);
284 memset(p21, '\0', 21); 357 memset(p21, '\0', 21);
285 358
286 rc = E_md4hash(passwd, p21); 359 rc = E_md4hash(passwd, p16);
287 if (rc) { 360 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 361 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc; 362 return rc;
290 } 363 }
291 SMBOWFencrypt(p21, c8, p24); 364 memcpy(p21, p16, 16);
365 rc = E_P24(p21, c8, p24);
292 return rc; 366 return rc;
293} 367}
294 368
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..147aa22c3c3a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
129 unsigned int len = iov[0].iov_len; 129 unsigned int len = iov[0].iov_len;
130 unsigned int total_len; 130 unsigned int total_len;
131 int first_vec = 0; 131 int first_vec = 0;
132 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 132 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
133 struct socket *ssocket = server->ssocket; 133 struct socket *ssocket = server->ssocket;
134 134
135 if (ssocket == NULL) 135 if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
144 else 144 else
145 smb_msg.msg_flags = MSG_NOSIGNAL; 145 smb_msg.msg_flags = MSG_NOSIGNAL;
146 146
147 /* smb header is converted in header_assemble. bcc and rest of SMB word
148 area, and byte area if necessary, is converted to littleendian in
149 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
150 Flags2 is converted in SendReceive */
151
152
153 total_len = 0; 147 total_len = 0;
154 for (i = 0; i < n_vec; i++) 148 for (i = 0; i < n_vec; i++)
155 total_len += iov[i].iov_len; 149 total_len += iov[i].iov_len;
156 150
157 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
158 cFYI(1, "Sending smb: total_len %d", total_len); 151 cFYI(1, "Sending smb: total_len %d", total_len);
159 dump_smb(smb_buffer, len); 152 dump_smb(smb_buffer, len);
160 153
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
243 236
244 /* Don't want to modify the buffer as a 237 /* Don't want to modify the buffer as a
245 side effect of this call. */ 238 side effect of this call. */
246 smb_buffer->smb_buf_length = smb_buf_length; 239 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
247 240
248 return rc; 241 return rc;
249} 242}
@@ -302,7 +295,7 @@ static int wait_for_free_request(struct TCP_Server_Info *server,
302 return 0; 295 return 0;
303} 296}
304 297
305static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, 298static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
306 struct mid_q_entry **ppmidQ) 299 struct mid_q_entry **ppmidQ)
307{ 300{
308 if (ses->server->tcpStatus == CifsExiting) { 301 if (ses->server->tcpStatus == CifsExiting) {
@@ -349,22 +342,24 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
349 * the result. Caller is responsible for dealing with timeouts. 342 * the result. Caller is responsible for dealing with timeouts.
350 */ 343 */
351int 344int
352cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, 345cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
353 mid_callback_t *callback, void *cbdata) 346 unsigned int nvec, mid_callback_t *callback, void *cbdata,
347 bool ignore_pend)
354{ 348{
355 int rc; 349 int rc;
356 struct mid_q_entry *mid; 350 struct mid_q_entry *mid;
351 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
357 352
358 rc = wait_for_free_request(server, CIFS_ASYNC_OP); 353 rc = wait_for_free_request(server, ignore_pend ? CIFS_ASYNC_OP : 0);
359 if (rc) 354 if (rc)
360 return rc; 355 return rc;
361 356
362 /* enable signing if server requires it */ 357 /* enable signing if server requires it */
363 if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 358 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
364 in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 359 hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
365 360
366 mutex_lock(&server->srv_mutex); 361 mutex_lock(&server->srv_mutex);
367 mid = AllocMidQEntry(in_buf, server); 362 mid = AllocMidQEntry(hdr, server);
368 if (mid == NULL) { 363 if (mid == NULL) {
369 mutex_unlock(&server->srv_mutex); 364 mutex_unlock(&server->srv_mutex);
370 return -ENOMEM; 365 return -ENOMEM;
@@ -375,7 +370,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
375 list_add_tail(&mid->qhead, &server->pending_mid_q); 370 list_add_tail(&mid->qhead, &server->pending_mid_q);
376 spin_unlock(&GlobalMid_Lock); 371 spin_unlock(&GlobalMid_Lock);
377 372
378 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 373 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
379 if (rc) { 374 if (rc) {
380 mutex_unlock(&server->srv_mutex); 375 mutex_unlock(&server->srv_mutex);
381 goto out_err; 376 goto out_err;
@@ -387,7 +382,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
387#ifdef CONFIG_CIFS_STATS2 382#ifdef CONFIG_CIFS_STATS2
388 atomic_inc(&server->inSend); 383 atomic_inc(&server->inSend);
389#endif 384#endif
390 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 385 rc = smb_sendv(server, iov, nvec);
391#ifdef CONFIG_CIFS_STATS2 386#ifdef CONFIG_CIFS_STATS2
392 atomic_dec(&server->inSend); 387 atomic_dec(&server->inSend);
393 mid->when_sent = jiffies; 388 mid->when_sent = jiffies;
@@ -414,7 +409,7 @@ out_err:
414 * 409 *
415 */ 410 */
416int 411int
417SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, 412SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
418 struct smb_hdr *in_buf, int flags) 413 struct smb_hdr *in_buf, int flags)
419{ 414{
420 int rc; 415 int rc;
@@ -422,7 +417,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
422 int resp_buf_type; 417 int resp_buf_type;
423 418
424 iov[0].iov_base = (char *)in_buf; 419 iov[0].iov_base = (char *)in_buf;
425 iov[0].iov_len = in_buf->smb_buf_length + 4; 420 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
426 flags |= CIFS_NO_RESP; 421 flags |= CIFS_NO_RESP;
427 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 422 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
428 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 423 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -431,7 +426,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
431} 426}
432 427
433static int 428static int
434sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) 429cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
435{ 430{
436 int rc = 0; 431 int rc = 0;
437 432
@@ -439,28 +434,21 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
439 mid->mid, mid->midState); 434 mid->mid, mid->midState);
440 435
441 spin_lock(&GlobalMid_Lock); 436 spin_lock(&GlobalMid_Lock);
442 /* ensure that it's no longer on the pending_mid_q */
443 list_del_init(&mid->qhead);
444
445 switch (mid->midState) { 437 switch (mid->midState) {
446 case MID_RESPONSE_RECEIVED: 438 case MID_RESPONSE_RECEIVED:
447 spin_unlock(&GlobalMid_Lock); 439 spin_unlock(&GlobalMid_Lock);
448 return rc; 440 return rc;
449 case MID_REQUEST_SUBMITTED:
450 /* socket is going down, reject all calls */
451 if (server->tcpStatus == CifsExiting) {
452 cERROR(1, "%s: canceling mid=%d cmd=0x%x state=%d",
453 __func__, mid->mid, mid->command, mid->midState);
454 rc = -EHOSTDOWN;
455 break;
456 }
457 case MID_RETRY_NEEDED: 441 case MID_RETRY_NEEDED:
458 rc = -EAGAIN; 442 rc = -EAGAIN;
459 break; 443 break;
460 case MID_RESPONSE_MALFORMED: 444 case MID_RESPONSE_MALFORMED:
461 rc = -EIO; 445 rc = -EIO;
462 break; 446 break;
447 case MID_SHUTDOWN:
448 rc = -EHOSTDOWN;
449 break;
463 default: 450 default:
451 list_del_init(&mid->qhead);
464 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 452 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
465 mid->mid, mid->midState); 453 mid->mid, mid->midState);
466 rc = -EIO; 454 rc = -EIO;
@@ -488,10 +476,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
488 int rc = 0; 476 int rc = 0;
489 477
490 /* -4 for RFC1001 length and +2 for BCC field */ 478 /* -4 for RFC1001 length and +2 for BCC field */
491 in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; 479 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
492 in_buf->Command = SMB_COM_NT_CANCEL; 480 in_buf->Command = SMB_COM_NT_CANCEL;
493 in_buf->WordCount = 0; 481 in_buf->WordCount = 0;
494 put_bcc_le(0, in_buf); 482 put_bcc(0, in_buf);
495 483
496 mutex_lock(&server->srv_mutex); 484 mutex_lock(&server->srv_mutex);
497 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 485 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +487,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
499 mutex_unlock(&server->srv_mutex); 487 mutex_unlock(&server->srv_mutex);
500 return rc; 488 return rc;
501 } 489 }
502 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 490 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
503 mutex_unlock(&server->srv_mutex); 491 mutex_unlock(&server->srv_mutex);
504 492
505 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", 493 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -509,13 +497,31 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
509} 497}
510 498
511int 499int
512SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 500cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
501 bool log_error)
502{
503 dump_smb(mid->resp_buf,
504 min_t(u32, 92, be32_to_cpu(mid->resp_buf->smb_buf_length)));
505
506 /* convert the length into a more usable form */
507 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
508 /* FIXME: add code to kill session */
509 if (cifs_verify_signature(mid->resp_buf, server,
510 mid->sequence_number + 1) != 0)
511 cERROR(1, "Unexpected SMB signature");
512 }
513
514 /* BB special case reconnect tid and uid here? */
515 return map_smb_to_linux_error(mid->resp_buf, log_error);
516}
517
518int
519SendReceive2(const unsigned int xid, struct cifs_ses *ses,
513 struct kvec *iov, int n_vec, int *pRespBufType /* ret */, 520 struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
514 const int flags) 521 const int flags)
515{ 522{
516 int rc = 0; 523 int rc = 0;
517 int long_op; 524 int long_op;
518 unsigned int receive_len;
519 struct mid_q_entry *midQ; 525 struct mid_q_entry *midQ;
520 struct smb_hdr *in_buf = iov[0].iov_base; 526 struct smb_hdr *in_buf = iov[0].iov_base;
521 527
@@ -605,66 +611,31 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
605 611
606 cifs_small_buf_release(in_buf); 612 cifs_small_buf_release(in_buf);
607 613
608 rc = sync_mid_result(midQ, ses->server); 614 rc = cifs_sync_mid_result(midQ, ses->server);
609 if (rc != 0) { 615 if (rc != 0) {
610 atomic_dec(&ses->server->inFlight); 616 atomic_dec(&ses->server->inFlight);
611 wake_up(&ses->server->request_q); 617 wake_up(&ses->server->request_q);
612 return rc; 618 return rc;
613 } 619 }
614 620
615 receive_len = midQ->resp_buf->smb_buf_length; 621 if (!midQ->resp_buf || midQ->midState != MID_RESPONSE_RECEIVED) {
616
617 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
618 cERROR(1, "Frame too large received. Length: %d Xid: %d",
619 receive_len, xid);
620 rc = -EIO; 622 rc = -EIO;
623 cFYI(1, "Bad MID state?");
621 goto out; 624 goto out;
622 } 625 }
623 626
624 /* rcvd frame is ok */ 627 iov[0].iov_base = (char *)midQ->resp_buf;
625 628 iov[0].iov_len = be32_to_cpu(midQ->resp_buf->smb_buf_length) + 4;
626 if (midQ->resp_buf && 629 if (midQ->largeBuf)
627 (midQ->midState == MID_RESPONSE_RECEIVED)) { 630 *pRespBufType = CIFS_LARGE_BUFFER;
628 631 else
629 iov[0].iov_base = (char *)midQ->resp_buf; 632 *pRespBufType = CIFS_SMALL_BUFFER;
630 if (midQ->largeBuf)
631 *pRespBufType = CIFS_LARGE_BUFFER;
632 else
633 *pRespBufType = CIFS_SMALL_BUFFER;
634 iov[0].iov_len = receive_len + 4;
635
636 dump_smb(midQ->resp_buf, 80);
637 /* convert the length into a more usable form */
638 if ((receive_len > 24) &&
639 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
640 SECMODE_SIGN_ENABLED))) {
641 rc = cifs_verify_signature(midQ->resp_buf,
642 ses->server,
643 midQ->sequence_number+1);
644 if (rc) {
645 cERROR(1, "Unexpected SMB signature");
646 /* BB FIXME add code to kill session */
647 }
648 }
649 633
650 /* BB special case reconnect tid and uid here? */ 634 rc = cifs_check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR);
651 rc = map_smb_to_linux_error(midQ->resp_buf,
652 flags & CIFS_LOG_ERROR);
653
654 /* convert ByteCount if necessary */
655 if (receive_len >= sizeof(struct smb_hdr) - 4
656 /* do not count RFC1001 header */ +
657 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
658 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
659 if ((flags & CIFS_NO_RESP) == 0)
660 midQ->resp_buf = NULL; /* mark it so buf will
661 not be freed by
662 delete_mid */
663 } else {
664 rc = -EIO;
665 cFYI(1, "Bad MID state?");
666 }
667 635
636 /* mark it so buf will not be freed by delete_mid */
637 if ((flags & CIFS_NO_RESP) == 0)
638 midQ->resp_buf = NULL;
668out: 639out:
669 delete_mid(midQ); 640 delete_mid(midQ);
670 atomic_dec(&ses->server->inFlight); 641 atomic_dec(&ses->server->inFlight);
@@ -674,12 +645,11 @@ out:
674} 645}
675 646
676int 647int
677SendReceive(const unsigned int xid, struct cifsSesInfo *ses, 648SendReceive(const unsigned int xid, struct cifs_ses *ses,
678 struct smb_hdr *in_buf, struct smb_hdr *out_buf, 649 struct smb_hdr *in_buf, struct smb_hdr *out_buf,
679 int *pbytes_returned, const int long_op) 650 int *pbytes_returned, const int long_op)
680{ 651{
681 int rc = 0; 652 int rc = 0;
682 unsigned int receive_len;
683 struct mid_q_entry *midQ; 653 struct mid_q_entry *midQ;
684 654
685 if (ses == NULL) { 655 if (ses == NULL) {
@@ -698,9 +668,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
698 to the same server. We may make this configurable later or 668 to the same server. We may make this configurable later or
699 use ses->maxReq */ 669 use ses->maxReq */
700 670
701 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 671 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
672 MAX_CIFS_HDR_SIZE - 4) {
702 cERROR(1, "Illegal length, greater than maximum frame, %d", 673 cERROR(1, "Illegal length, greater than maximum frame, %d",
703 in_buf->smb_buf_length); 674 be32_to_cpu(in_buf->smb_buf_length));
704 return -EIO; 675 return -EIO;
705 } 676 }
706 677
@@ -733,7 +704,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
733#ifdef CONFIG_CIFS_STATS2 704#ifdef CONFIG_CIFS_STATS2
734 atomic_inc(&ses->server->inSend); 705 atomic_inc(&ses->server->inSend);
735#endif 706#endif
736 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 707 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
737#ifdef CONFIG_CIFS_STATS2 708#ifdef CONFIG_CIFS_STATS2
738 atomic_dec(&ses->server->inSend); 709 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 710 midQ->when_sent = jiffies;
@@ -761,60 +732,23 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
761 spin_unlock(&GlobalMid_Lock); 732 spin_unlock(&GlobalMid_Lock);
762 } 733 }
763 734
764 rc = sync_mid_result(midQ, ses->server); 735 rc = cifs_sync_mid_result(midQ, ses->server);
765 if (rc != 0) { 736 if (rc != 0) {
766 atomic_dec(&ses->server->inFlight); 737 atomic_dec(&ses->server->inFlight);
767 wake_up(&ses->server->request_q); 738 wake_up(&ses->server->request_q);
768 return rc; 739 return rc;
769 } 740 }
770 741
771 receive_len = midQ->resp_buf->smb_buf_length; 742 if (!midQ->resp_buf || !out_buf ||
772 743 midQ->midState != MID_RESPONSE_RECEIVED) {
773 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
774 cERROR(1, "Frame too large received. Length: %d Xid: %d",
775 receive_len, xid);
776 rc = -EIO;
777 goto out;
778 }
779
780 /* rcvd frame is ok */
781
782 if (midQ->resp_buf && out_buf
783 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
784 out_buf->smb_buf_length = receive_len;
785 memcpy((char *)out_buf + 4,
786 (char *)midQ->resp_buf + 4,
787 receive_len);
788
789 dump_smb(out_buf, 92);
790 /* convert the length into a more usable form */
791 if ((receive_len > 24) &&
792 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
793 SECMODE_SIGN_ENABLED))) {
794 rc = cifs_verify_signature(out_buf,
795 ses->server,
796 midQ->sequence_number+1);
797 if (rc) {
798 cERROR(1, "Unexpected SMB signature");
799 /* BB FIXME add code to kill session */
800 }
801 }
802
803 *pbytes_returned = out_buf->smb_buf_length;
804
805 /* BB special case reconnect tid and uid here? */
806 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
807
808 /* convert ByteCount if necessary */
809 if (receive_len >= sizeof(struct smb_hdr) - 4
810 /* do not count RFC1001 header */ +
811 (2 * out_buf->WordCount) + 2 /* bcc */ )
812 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
813 } else {
814 rc = -EIO; 744 rc = -EIO;
815 cERROR(1, "Bad MID state?"); 745 cERROR(1, "Bad MID state?");
746 goto out;
816 } 747 }
817 748
749 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length);
750 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
751 rc = cifs_check_receive(midQ, ses->server, 0);
818out: 752out:
819 delete_mid(midQ); 753 delete_mid(midQ);
820 atomic_dec(&ses->server->inFlight); 754 atomic_dec(&ses->server->inFlight);
@@ -827,12 +761,12 @@ out:
827 blocking lock to return. */ 761 blocking lock to return. */
828 762
829static int 763static int
830send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, 764send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
831 struct smb_hdr *in_buf, 765 struct smb_hdr *in_buf,
832 struct smb_hdr *out_buf) 766 struct smb_hdr *out_buf)
833{ 767{
834 int bytes_returned; 768 int bytes_returned;
835 struct cifsSesInfo *ses = tcon->ses; 769 struct cifs_ses *ses = tcon->ses;
836 LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; 770 LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
837 771
838 /* We just modify the current in_buf to change 772 /* We just modify the current in_buf to change
@@ -849,15 +783,14 @@ send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon,
849} 783}
850 784
851int 785int
852SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, 786SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
853 struct smb_hdr *in_buf, struct smb_hdr *out_buf, 787 struct smb_hdr *in_buf, struct smb_hdr *out_buf,
854 int *pbytes_returned) 788 int *pbytes_returned)
855{ 789{
856 int rc = 0; 790 int rc = 0;
857 int rstart = 0; 791 int rstart = 0;
858 unsigned int receive_len;
859 struct mid_q_entry *midQ; 792 struct mid_q_entry *midQ;
860 struct cifsSesInfo *ses; 793 struct cifs_ses *ses;
861 794
862 if (tcon == NULL || tcon->ses == NULL) { 795 if (tcon == NULL || tcon->ses == NULL) {
863 cERROR(1, "Null smb session"); 796 cERROR(1, "Null smb session");
@@ -877,9 +810,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
877 to the same server. We may make this configurable later or 810 to the same server. We may make this configurable later or
878 use ses->maxReq */ 811 use ses->maxReq */
879 812
880 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 813 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
814 MAX_CIFS_HDR_SIZE - 4) {
881 cERROR(1, "Illegal length, greater than maximum frame, %d", 815 cERROR(1, "Illegal length, greater than maximum frame, %d",
882 in_buf->smb_buf_length); 816 be32_to_cpu(in_buf->smb_buf_length));
883 return -EIO; 817 return -EIO;
884 } 818 }
885 819
@@ -910,7 +844,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
910#ifdef CONFIG_CIFS_STATS2 844#ifdef CONFIG_CIFS_STATS2
911 atomic_inc(&ses->server->inSend); 845 atomic_inc(&ses->server->inSend);
912#endif 846#endif
913 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 847 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
914#ifdef CONFIG_CIFS_STATS2 848#ifdef CONFIG_CIFS_STATS2
915 atomic_dec(&ses->server->inSend); 849 atomic_dec(&ses->server->inSend);
916 midQ->when_sent = jiffies; 850 midQ->when_sent = jiffies;
@@ -973,56 +907,20 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
973 rstart = 1; 907 rstart = 1;
974 } 908 }
975 909
976 rc = sync_mid_result(midQ, ses->server); 910 rc = cifs_sync_mid_result(midQ, ses->server);
977 if (rc != 0) 911 if (rc != 0)
978 return rc; 912 return rc;
979 913
980 receive_len = midQ->resp_buf->smb_buf_length;
981 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
982 cERROR(1, "Frame too large received. Length: %d Xid: %d",
983 receive_len, xid);
984 rc = -EIO;
985 goto out;
986 }
987
988 /* rcvd frame is ok */ 914 /* rcvd frame is ok */
989 915 if (out_buf == NULL || midQ->midState != MID_RESPONSE_RECEIVED) {
990 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) {
991 rc = -EIO; 916 rc = -EIO;
992 cERROR(1, "Bad MID state?"); 917 cERROR(1, "Bad MID state?");
993 goto out; 918 goto out;
994 } 919 }
995 920
996 out_buf->smb_buf_length = receive_len; 921 *pbytes_returned = be32_to_cpu(midQ->resp_buf->smb_buf_length);
997 memcpy((char *)out_buf + 4, 922 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
998 (char *)midQ->resp_buf + 4, 923 rc = cifs_check_receive(midQ, ses->server, 0);
999 receive_len);
1000
1001 dump_smb(out_buf, 92);
1002 /* convert the length into a more usable form */
1003 if ((receive_len > 24) &&
1004 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
1005 SECMODE_SIGN_ENABLED))) {
1006 rc = cifs_verify_signature(out_buf,
1007 ses->server,
1008 midQ->sequence_number+1);
1009 if (rc) {
1010 cERROR(1, "Unexpected SMB signature");
1011 /* BB FIXME add code to kill session */
1012 }
1013 }
1014
1015 *pbytes_returned = out_buf->smb_buf_length;
1016
1017 /* BB special case reconnect tid and uid here? */
1018 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1019
1020 /* convert ByteCount if necessary */
1021 if (receive_len >= sizeof(struct smb_hdr) - 4
1022 /* do not count RFC1001 header */ +
1023 (2 * out_buf->WordCount) + 2 /* bcc */ )
1024 put_bcc(get_bcc_le(out_buf), out_buf);
1025
1026out: 924out:
1027 delete_mid(midQ); 925 delete_mid(midQ);
1028 if (rstart && rc == -EACCES) 926 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..2a22fb2989e4 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -49,7 +49,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
49 int xid; 49 int xid;
50 struct cifs_sb_info *cifs_sb; 50 struct cifs_sb_info *cifs_sb;
51 struct tcon_link *tlink; 51 struct tcon_link *tlink;
52 struct cifsTconInfo *pTcon; 52 struct cifs_tcon *pTcon;
53 struct super_block *sb; 53 struct super_block *sb;
54 char *full_path = NULL; 54 char *full_path = NULL;
55 55
@@ -109,9 +109,10 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
109 int xid; 109 int xid;
110 struct cifs_sb_info *cifs_sb; 110 struct cifs_sb_info *cifs_sb;
111 struct tcon_link *tlink; 111 struct tcon_link *tlink;
112 struct cifsTconInfo *pTcon; 112 struct cifs_tcon *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl;
115 116
116 if (direntry == NULL) 117 if (direntry == NULL)
117 return -EIO; 118 return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
166 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
167 (__u16)value_size, cifs_sb->local_nls, 168 (__u16)value_size, cifs_sb->local_nls,
168 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
170 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
171 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
172 pacl = kmalloc(value_size, GFP_KERNEL);
173 if (!pacl) {
174 cFYI(1, "%s: Can't allocate memory for ACL",
175 __func__);
176 rc = -ENOMEM;
177 } else {
178#ifdef CONFIG_CIFS_ACL
179 memcpy(pacl, ea_value, value_size);
180 rc = set_cifs_acl(pacl, value_size,
181 direntry->d_inode, full_path);
182 if (rc == 0) /* force revalidate of the inode */
183 CIFS_I(direntry->d_inode)->time = 0;
184 kfree(pacl);
185#else
186 cFYI(1, "Set CIFS ACL not supported yet");
187#endif /* CONFIG_CIFS_ACL */
188 }
169 } else { 189 } else {
170 int temp; 190 int temp;
171 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 191 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
@@ -220,7 +240,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
220 int xid; 240 int xid;
221 struct cifs_sb_info *cifs_sb; 241 struct cifs_sb_info *cifs_sb;
222 struct tcon_link *tlink; 242 struct tcon_link *tlink;
223 struct cifsTconInfo *pTcon; 243 struct cifs_tcon *pTcon;
224 struct super_block *sb; 244 struct super_block *sb;
225 char *full_path; 245 char *full_path;
226 246
@@ -352,7 +372,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
352 int xid; 372 int xid;
353 struct cifs_sb_info *cifs_sb; 373 struct cifs_sb_info *cifs_sb;
354 struct tcon_link *tlink; 374 struct tcon_link *tlink;
355 struct cifsTconInfo *pTcon; 375 struct cifs_tcon *pTcon;
356 struct super_block *sb; 376 struct super_block *sb;
357 char *full_path; 377 char *full_path;
358 378
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2b8dae4d121e..a46126fd5735 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -336,6 +336,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
336 int len = de->d_name.len; 336 int len = de->d_name.len;
337 int error; 337 int error;
338 338
339 dentry_unhash(de);
340
339 error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); 341 error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
340 if (!error) { 342 if (!error) {
341 /* VFS may delete the child */ 343 /* VFS may delete the child */
@@ -359,6 +361,9 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
359 int new_length = new_dentry->d_name.len; 361 int new_length = new_dentry->d_name.len;
360 int error; 362 int error;
361 363
364 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
365 dentry_unhash(new_dentry);
366
362 error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), 367 error = venus_rename(old_dir->i_sb, coda_i2f(old_dir),
363 coda_i2f(new_dir), old_length, new_length, 368 coda_i2f(new_dir), old_length, new_length,
364 (const char *) old_name, (const char *)new_name); 369 (const char *) old_name, (const char *)new_name);
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
1306 return do_sys_open(dfd, filename, flags, mode); 1306 return do_sys_open(dfd, filename, flags, mode);
1307} 1307}
1308 1308
1309/*
1310 * compat_count() counts the number of arguments/envelopes. It is basically
1311 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1312 * and envp pointers.
1313 */
1314static int compat_count(compat_uptr_t __user *argv, int max)
1315{
1316 int i = 0;
1317
1318 if (argv != NULL) {
1319 for (;;) {
1320 compat_uptr_t p;
1321
1322 if (get_user(p, argv))
1323 return -EFAULT;
1324 if (!p)
1325 break;
1326 argv++;
1327 if (i++ >= max)
1328 return -E2BIG;
1329
1330 if (fatal_signal_pending(current))
1331 return -ERESTARTNOHAND;
1332 cond_resched();
1333 }
1334 }
1335 return i;
1336}
1337
1338/*
1339 * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
1340 * except that it works with 32 bit argv and envp pointers.
1341 */
1342static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1343 struct linux_binprm *bprm)
1344{
1345 struct page *kmapped_page = NULL;
1346 char *kaddr = NULL;
1347 unsigned long kpos = 0;
1348 int ret;
1349
1350 while (argc-- > 0) {
1351 compat_uptr_t str;
1352 int len;
1353 unsigned long pos;
1354
1355 if (get_user(str, argv+argc) ||
1356 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1357 ret = -EFAULT;
1358 goto out;
1359 }
1360
1361 if (len > MAX_ARG_STRLEN) {
1362 ret = -E2BIG;
1363 goto out;
1364 }
1365
1366 /* We're going to work our way backwords. */
1367 pos = bprm->p;
1368 str += len;
1369 bprm->p -= len;
1370
1371 while (len > 0) {
1372 int offset, bytes_to_copy;
1373
1374 if (fatal_signal_pending(current)) {
1375 ret = -ERESTARTNOHAND;
1376 goto out;
1377 }
1378 cond_resched();
1379
1380 offset = pos % PAGE_SIZE;
1381 if (offset == 0)
1382 offset = PAGE_SIZE;
1383
1384 bytes_to_copy = offset;
1385 if (bytes_to_copy > len)
1386 bytes_to_copy = len;
1387
1388 offset -= bytes_to_copy;
1389 pos -= bytes_to_copy;
1390 str -= bytes_to_copy;
1391 len -= bytes_to_copy;
1392
1393 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1394 struct page *page;
1395
1396 page = get_arg_page(bprm, pos, 1);
1397 if (!page) {
1398 ret = -E2BIG;
1399 goto out;
1400 }
1401
1402 if (kmapped_page) {
1403 flush_kernel_dcache_page(kmapped_page);
1404 kunmap(kmapped_page);
1405 put_page(kmapped_page);
1406 }
1407 kmapped_page = page;
1408 kaddr = kmap(kmapped_page);
1409 kpos = pos & PAGE_MASK;
1410 flush_cache_page(bprm->vma, kpos,
1411 page_to_pfn(kmapped_page));
1412 }
1413 if (copy_from_user(kaddr+offset, compat_ptr(str),
1414 bytes_to_copy)) {
1415 ret = -EFAULT;
1416 goto out;
1417 }
1418 }
1419 }
1420 ret = 0;
1421out:
1422 if (kmapped_page) {
1423 flush_kernel_dcache_page(kmapped_page);
1424 kunmap(kmapped_page);
1425 put_page(kmapped_page);
1426 }
1427 return ret;
1428}
1429
1430/*
1431 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1432 * that it processes 32 bit argv and envp pointers.
1433 */
1434int compat_do_execve(char * filename,
1435 compat_uptr_t __user *argv,
1436 compat_uptr_t __user *envp,
1437 struct pt_regs * regs)
1438{
1439 struct linux_binprm *bprm;
1440 struct file *file;
1441 struct files_struct *displaced;
1442 bool clear_in_exec;
1443 int retval;
1444
1445 retval = unshare_files(&displaced);
1446 if (retval)
1447 goto out_ret;
1448
1449 retval = -ENOMEM;
1450 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1451 if (!bprm)
1452 goto out_files;
1453
1454 retval = prepare_bprm_creds(bprm);
1455 if (retval)
1456 goto out_free;
1457
1458 retval = check_unsafe_exec(bprm);
1459 if (retval < 0)
1460 goto out_free;
1461 clear_in_exec = retval;
1462 current->in_execve = 1;
1463
1464 file = open_exec(filename);
1465 retval = PTR_ERR(file);
1466 if (IS_ERR(file))
1467 goto out_unmark;
1468
1469 sched_exec();
1470
1471 bprm->file = file;
1472 bprm->filename = filename;
1473 bprm->interp = filename;
1474
1475 retval = bprm_mm_init(bprm);
1476 if (retval)
1477 goto out_file;
1478
1479 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1480 if ((retval = bprm->argc) < 0)
1481 goto out;
1482
1483 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1484 if ((retval = bprm->envc) < 0)
1485 goto out;
1486
1487 retval = prepare_binprm(bprm);
1488 if (retval < 0)
1489 goto out;
1490
1491 retval = copy_strings_kernel(1, &bprm->filename, bprm);
1492 if (retval < 0)
1493 goto out;
1494
1495 bprm->exec = bprm->p;
1496 retval = compat_copy_strings(bprm->envc, envp, bprm);
1497 if (retval < 0)
1498 goto out;
1499
1500 retval = compat_copy_strings(bprm->argc, argv, bprm);
1501 if (retval < 0)
1502 goto out;
1503
1504 retval = search_binary_handler(bprm, regs);
1505 if (retval < 0)
1506 goto out;
1507
1508 /* execve succeeded */
1509 current->fs->in_exec = 0;
1510 current->in_execve = 0;
1511 acct_update_integrals(current);
1512 free_bprm(bprm);
1513 if (displaced)
1514 put_files_struct(displaced);
1515 return retval;
1516
1517out:
1518 if (bprm->mm) {
1519 acct_arg_size(bprm, 0);
1520 mmput(bprm->mm);
1521 }
1522
1523out_file:
1524 if (bprm->file) {
1525 allow_write_access(bprm->file);
1526 fput(bprm->file);
1527 }
1528
1529out_unmark:
1530 if (clear_in_exec)
1531 current->fs->in_exec = 0;
1532 current->in_execve = 0;
1533
1534out_free:
1535 free_bprm(bprm);
1536
1537out_files:
1538 if (displaced)
1539 reset_files_struct(displaced);
1540out_ret:
1541 return retval;
1542}
1543
1544#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1309#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1545 1310
1546static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1311static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9a37a9b6de3a..9d17d350abc5 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1359,6 +1359,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1359 struct module *subsys_owner = NULL, *dead_item_owner = NULL; 1359 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1360 int ret; 1360 int ret;
1361 1361
1362 dentry_unhash(dentry);
1363
1362 if (dentry->d_parent == configfs_sb->s_root) 1364 if (dentry->d_parent == configfs_sb->s_root)
1363 return -EPERM; 1365 return -EPERM;
1364 1366
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* 41/*
@@ -1219,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
1219EXPORT_SYMBOL(shrink_dcache_parent); 1220EXPORT_SYMBOL(shrink_dcache_parent);
1220 1221
1221/* 1222/*
1222 * Scan `nr' dentries and return the number which remain. 1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1223 * 1224 *
1224 * We need to avoid reentering the filesystem if the caller is performing a 1225 * We need to avoid reentering the filesystem if the caller is performing a
1225 * GFP_NOFS allocation attempt. One example deadlock is: 1226 * GFP_NOFS allocation attempt. One example deadlock is:
@@ -1230,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
1230 * 1231 *
1231 * In this case we return -1 to tell the caller that we baled. 1232 * In this case we return -1 to tell the caller that we baled.
1232 */ 1233 */
1233static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
1234{ 1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
1235 if (nr) { 1240 if (nr) {
1236 if (!(gfp_mask & __GFP_FS)) 1241 if (!(gfp_mask & __GFP_FS))
1237 return -1; 1242 return -1;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
428 size_t count, loff_t *ppos) 428 size_t count, loff_t *ppos)
429{ 429{
430 char buf[32]; 430 char buf[32];
431 int buf_size; 431 size_t buf_size;
432 bool bv;
432 u32 *val = file->private_data; 433 u32 *val = file->private_data;
433 434
434 buf_size = min(count, (sizeof(buf)-1)); 435 buf_size = min(count, (sizeof(buf)-1));
435 if (copy_from_user(buf, user_buf, buf_size)) 436 if (copy_from_user(buf, user_buf, buf_size))
436 return -EFAULT; 437 return -EFAULT;
437 438
438 switch (buf[0]) { 439 if (strtobool(buf, &bv) == 0)
439 case 'y': 440 *val = bv;
440 case 'Y': 441
441 case '1':
442 *val = 1;
443 break;
444 case 'n':
445 case 'N':
446 case '0':
447 *val = 0;
448 break;
449 }
450
451 return count; 442 return count;
452} 443}
453 444
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
100 unsigned int cl_log_debug; 100 unsigned int cl_log_debug;
101 unsigned int cl_protocol; 101 unsigned int cl_protocol;
102 unsigned int cl_timewarn_cs; 102 unsigned int cl_timewarn_cs;
103 unsigned int cl_waitwarn_us;
103}; 104};
104 105
105enum { 106enum {
@@ -114,6 +115,7 @@ enum {
114 CLUSTER_ATTR_LOG_DEBUG, 115 CLUSTER_ATTR_LOG_DEBUG,
115 CLUSTER_ATTR_PROTOCOL, 116 CLUSTER_ATTR_PROTOCOL,
116 CLUSTER_ATTR_TIMEWARN_CS, 117 CLUSTER_ATTR_TIMEWARN_CS,
118 CLUSTER_ATTR_WAITWARN_US,
117}; 119};
118 120
119struct cluster_attribute { 121struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
166CLUSTER_ATTR(log_debug, 0); 168CLUSTER_ATTR(log_debug, 0);
167CLUSTER_ATTR(protocol, 0); 169CLUSTER_ATTR(protocol, 0);
168CLUSTER_ATTR(timewarn_cs, 1); 170CLUSTER_ATTR(timewarn_cs, 1);
171CLUSTER_ATTR(waitwarn_us, 0);
169 172
170static struct configfs_attribute *cluster_attrs[] = { 173static struct configfs_attribute *cluster_attrs[] = {
171 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 174 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
179 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 182 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
180 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 183 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
181 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 184 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
185 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
182 NULL, 186 NULL,
183}; 187};
184 188
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
439 cl->cl_log_debug = dlm_config.ci_log_debug; 443 cl->cl_log_debug = dlm_config.ci_log_debug;
440 cl->cl_protocol = dlm_config.ci_protocol; 444 cl->cl_protocol = dlm_config.ci_protocol;
441 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 445 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
446 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
442 447
443 space_list = &sps->ss_group; 448 space_list = &sps->ss_group;
444 comm_list = &cms->cs_group; 449 comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
986#define DEFAULT_LOG_DEBUG 0 991#define DEFAULT_LOG_DEBUG 0
987#define DEFAULT_PROTOCOL 0 992#define DEFAULT_PROTOCOL 0
988#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 993#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
994#define DEFAULT_WAITWARN_US 0
989 995
990struct dlm_config_info dlm_config = { 996struct dlm_config_info dlm_config = {
991 .ci_tcp_port = DEFAULT_TCP_PORT, 997 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
998 .ci_scan_secs = DEFAULT_SCAN_SECS, 1004 .ci_scan_secs = DEFAULT_SCAN_SECS,
999 .ci_log_debug = DEFAULT_LOG_DEBUG, 1005 .ci_log_debug = DEFAULT_LOG_DEBUG,
1000 .ci_protocol = DEFAULT_PROTOCOL, 1006 .ci_protocol = DEFAULT_PROTOCOL,
1001 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS 1007 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1008 .ci_waitwarn_us = DEFAULT_WAITWARN_US
1002}; 1009};
1003 1010
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs; 30 int ci_timewarn_cs;
31 int ci_waitwarn_us;
31}; 32};
32 33
33extern struct dlm_config_info dlm_config; 34extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
209#define DLM_IFL_WATCH_TIMEWARN 0x00400000 209#define DLM_IFL_WATCH_TIMEWARN 0x00400000
210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
212#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
212#define DLM_IFL_USER 0x00000001 213#define DLM_IFL_USER 0x00000001
213#define DLM_IFL_ORPHAN 0x00000002 214#define DLM_IFL_ORPHAN 0x00000002
214 215
@@ -245,6 +246,7 @@ struct dlm_lkb {
245 246
246 int8_t lkb_wait_type; /* type of reply waiting for */ 247 int8_t lkb_wait_type; /* type of reply waiting for */
247 int8_t lkb_wait_count; 248 int8_t lkb_wait_count;
249 int lkb_wait_nodeid; /* for debugging */
248 250
249 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 251 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
250 struct list_head lkb_statequeue; /* rsb g/c/w list */ 252 struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
254 struct list_head lkb_ownqueue; /* list of locks for a process */ 256 struct list_head lkb_ownqueue; /* list of locks for a process */
255 struct list_head lkb_time_list; 257 struct list_head lkb_time_list;
256 ktime_t lkb_timestamp; 258 ktime_t lkb_timestamp;
259 ktime_t lkb_wait_time;
257 unsigned long lkb_timeout_cs; 260 unsigned long lkb_timeout_cs;
258 261
259 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; 262 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
799 return -1; 799 return -1;
800} 800}
801 801
802static int nodeid_warned(int nodeid, int num_nodes, int *warned)
803{
804 int i;
805
806 for (i = 0; i < num_nodes; i++) {
807 if (!warned[i]) {
808 warned[i] = nodeid;
809 return 0;
810 }
811 if (warned[i] == nodeid)
812 return 1;
813 }
814 return 0;
815}
816
817void dlm_scan_waiters(struct dlm_ls *ls)
818{
819 struct dlm_lkb *lkb;
820 ktime_t zero = ktime_set(0, 0);
821 s64 us;
822 s64 debug_maxus = 0;
823 u32 debug_scanned = 0;
824 u32 debug_expired = 0;
825 int num_nodes = 0;
826 int *warned = NULL;
827
828 if (!dlm_config.ci_waitwarn_us)
829 return;
830
831 mutex_lock(&ls->ls_waiters_mutex);
832
833 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
834 if (ktime_equal(lkb->lkb_wait_time, zero))
835 continue;
836
837 debug_scanned++;
838
839 us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
840
841 if (us < dlm_config.ci_waitwarn_us)
842 continue;
843
844 lkb->lkb_wait_time = zero;
845
846 debug_expired++;
847 if (us > debug_maxus)
848 debug_maxus = us;
849
850 if (!num_nodes) {
851 num_nodes = ls->ls_num_nodes;
852 warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
853 if (warned)
854 memset(warned, 0, num_nodes * sizeof(int));
855 }
856 if (!warned)
857 continue;
858 if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
859 continue;
860
861 log_error(ls, "waitwarn %x %lld %d us check connection to "
862 "node %d", lkb->lkb_id, (long long)us,
863 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
864 }
865 mutex_unlock(&ls->ls_waiters_mutex);
866
867 if (warned)
868 kfree(warned);
869
870 if (debug_expired)
871 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
872 debug_scanned, debug_expired,
873 dlm_config.ci_waitwarn_us, (long long)debug_maxus);
874}
875
802/* add/remove lkb from global waiters list of lkb's waiting for 876/* add/remove lkb from global waiters list of lkb's waiting for
803 a reply from a remote node */ 877 a reply from a remote node */
804 878
805static int add_to_waiters(struct dlm_lkb *lkb, int mstype) 879static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
806{ 880{
807 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 881 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
808 int error = 0; 882 int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
842 916
843 lkb->lkb_wait_count++; 917 lkb->lkb_wait_count++;
844 lkb->lkb_wait_type = mstype; 918 lkb->lkb_wait_type = mstype;
919 lkb->lkb_wait_time = ktime_get();
920 lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
845 hold_lkb(lkb); 921 hold_lkb(lkb);
846 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 922 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
847 out: 923 out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
961 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 1037 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
962 int error; 1038 int error;
963 1039
964 if (ms != &ls->ls_stub_ms) 1040 if (ms->m_flags != DLM_IFL_STUB_MS)
965 mutex_lock(&ls->ls_waiters_mutex); 1041 mutex_lock(&ls->ls_waiters_mutex);
966 error = _remove_from_waiters(lkb, ms->m_type, ms); 1042 error = _remove_from_waiters(lkb, ms->m_type, ms);
967 if (ms != &ls->ls_stub_ms) 1043 if (ms->m_flags != DLM_IFL_STUB_MS)
968 mutex_unlock(&ls->ls_waiters_mutex); 1044 mutex_unlock(&ls->ls_waiters_mutex);
969 return error; 1045 return error;
970} 1046}
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
1157 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) 1233 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1158 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); 1234 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
1159 mutex_unlock(&ls->ls_timeout_mutex); 1235 mutex_unlock(&ls->ls_timeout_mutex);
1236
1237 if (!dlm_config.ci_waitwarn_us)
1238 return;
1239
1240 mutex_lock(&ls->ls_waiters_mutex);
1241 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1242 if (ktime_to_us(lkb->lkb_wait_time))
1243 lkb->lkb_wait_time = ktime_get();
1244 }
1245 mutex_unlock(&ls->ls_waiters_mutex);
1160} 1246}
1161 1247
1162/* lkb is master or local copy */ 1248/* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1376 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become 1462 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
1377 compatible with other granted locks */ 1463 compatible with other granted locks */
1378 1464
1379static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) 1465static void munge_demoted(struct dlm_lkb *lkb)
1380{ 1466{
1381 if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
1382 log_print("munge_demoted %x invalid reply type %d",
1383 lkb->lkb_id, ms->m_type);
1384 return;
1385 }
1386
1387 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { 1467 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
1388 log_print("munge_demoted %x invalid modes gr %d rq %d", 1468 log_print("munge_demoted %x invalid modes gr %d rq %d",
1389 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); 1469 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2844 struct dlm_mhandle *mh; 2924 struct dlm_mhandle *mh;
2845 int to_nodeid, error; 2925 int to_nodeid, error;
2846 2926
2847 error = add_to_waiters(lkb, mstype); 2927 to_nodeid = r->res_nodeid;
2928
2929 error = add_to_waiters(lkb, mstype, to_nodeid);
2848 if (error) 2930 if (error)
2849 return error; 2931 return error;
2850 2932
2851 to_nodeid = r->res_nodeid;
2852
2853 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); 2933 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2854 if (error) 2934 if (error)
2855 goto fail; 2935 goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2880 /* down conversions go without a reply from the master */ 2960 /* down conversions go without a reply from the master */
2881 if (!error && down_conversion(lkb)) { 2961 if (!error && down_conversion(lkb)) {
2882 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); 2962 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
2963 r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
2883 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 2964 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
2884 r->res_ls->ls_stub_ms.m_result = 0; 2965 r->res_ls->ls_stub_ms.m_result = 0;
2885 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
2886 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); 2966 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2887 } 2967 }
2888 2968
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2951 struct dlm_mhandle *mh; 3031 struct dlm_mhandle *mh;
2952 int to_nodeid, error; 3032 int to_nodeid, error;
2953 3033
2954 error = add_to_waiters(lkb, DLM_MSG_LOOKUP); 3034 to_nodeid = dlm_dir_nodeid(r);
3035
3036 error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
2955 if (error) 3037 if (error)
2956 return error; 3038 return error;
2957 3039
2958 to_nodeid = dlm_dir_nodeid(r);
2959
2960 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); 3040 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2961 if (error) 3041 if (error)
2962 goto fail; 3042 goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3070 3150
3071static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3151static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3072{ 3152{
3153 if (ms->m_flags == DLM_IFL_STUB_MS)
3154 return;
3155
3073 lkb->lkb_sbflags = ms->m_sbflags; 3156 lkb->lkb_sbflags = ms->m_sbflags;
3074 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | 3157 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3075 (ms->m_flags & 0x0000FFFF); 3158 (ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3612 /* convert was queued on remote master */ 3695 /* convert was queued on remote master */
3613 receive_flags_reply(lkb, ms); 3696 receive_flags_reply(lkb, ms);
3614 if (is_demoted(lkb)) 3697 if (is_demoted(lkb))
3615 munge_demoted(lkb, ms); 3698 munge_demoted(lkb);
3616 del_lkb(r, lkb); 3699 del_lkb(r, lkb);
3617 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3700 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3618 add_timeout(lkb); 3701 add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3622 /* convert was granted on remote master */ 3705 /* convert was granted on remote master */
3623 receive_flags_reply(lkb, ms); 3706 receive_flags_reply(lkb, ms);
3624 if (is_demoted(lkb)) 3707 if (is_demoted(lkb))
3625 munge_demoted(lkb, ms); 3708 munge_demoted(lkb);
3626 grant_lock_pc(r, lkb, ms); 3709 grant_lock_pc(r, lkb, ms);
3627 queue_cast(r, lkb, 0); 3710 queue_cast(r, lkb, 0);
3628 break; 3711 break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
3996 dlm_put_lockspace(ls); 4079 dlm_put_lockspace(ls);
3997} 4080}
3998 4081
3999static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 4082static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
4083 struct dlm_message *ms_stub)
4000{ 4084{
4001 if (middle_conversion(lkb)) { 4085 if (middle_conversion(lkb)) {
4002 hold_lkb(lkb); 4086 hold_lkb(lkb);
4003 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 4087 memset(ms_stub, 0, sizeof(struct dlm_message));
4004 ls->ls_stub_ms.m_result = -EINPROGRESS; 4088 ms_stub->m_flags = DLM_IFL_STUB_MS;
4005 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4089 ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
4006 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4090 ms_stub->m_result = -EINPROGRESS;
4007 _receive_convert_reply(lkb, &ls->ls_stub_ms); 4091 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4092 _receive_convert_reply(lkb, ms_stub);
4008 4093
4009 /* Same special case as in receive_rcom_lock_args() */ 4094 /* Same special case as in receive_rcom_lock_args() */
4010 lkb->lkb_grmode = DLM_LOCK_IV; 4095 lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
4045void dlm_recover_waiters_pre(struct dlm_ls *ls) 4130void dlm_recover_waiters_pre(struct dlm_ls *ls)
4046{ 4131{
4047 struct dlm_lkb *lkb, *safe; 4132 struct dlm_lkb *lkb, *safe;
4133 struct dlm_message *ms_stub;
4048 int wait_type, stub_unlock_result, stub_cancel_result; 4134 int wait_type, stub_unlock_result, stub_cancel_result;
4049 4135
4136 ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
4137 if (!ms_stub) {
4138 log_error(ls, "dlm_recover_waiters_pre no mem");
4139 return;
4140 }
4141
4050 mutex_lock(&ls->ls_waiters_mutex); 4142 mutex_lock(&ls->ls_waiters_mutex);
4051 4143
4052 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { 4144 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
4053 log_debug(ls, "pre recover waiter lkid %x type %d flags %x", 4145
4054 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); 4146 /* exclude debug messages about unlocks because there can be so
4147 many and they aren't very interesting */
4148
4149 if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
4150 log_debug(ls, "recover_waiter %x nodeid %d "
4151 "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
4152 lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
4153 }
4055 4154
4056 /* all outstanding lookups, regardless of destination will be 4155 /* all outstanding lookups, regardless of destination will be
4057 resent after recovery is done */ 4156 resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4097 break; 4196 break;
4098 4197
4099 case DLM_MSG_CONVERT: 4198 case DLM_MSG_CONVERT:
4100 recover_convert_waiter(ls, lkb); 4199 recover_convert_waiter(ls, lkb, ms_stub);
4101 break; 4200 break;
4102 4201
4103 case DLM_MSG_UNLOCK: 4202 case DLM_MSG_UNLOCK:
4104 hold_lkb(lkb); 4203 hold_lkb(lkb);
4105 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 4204 memset(ms_stub, 0, sizeof(struct dlm_message));
4106 ls->ls_stub_ms.m_result = stub_unlock_result; 4205 ms_stub->m_flags = DLM_IFL_STUB_MS;
4107 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4206 ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
4108 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4207 ms_stub->m_result = stub_unlock_result;
4109 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 4208 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4209 _receive_unlock_reply(lkb, ms_stub);
4110 dlm_put_lkb(lkb); 4210 dlm_put_lkb(lkb);
4111 break; 4211 break;
4112 4212
4113 case DLM_MSG_CANCEL: 4213 case DLM_MSG_CANCEL:
4114 hold_lkb(lkb); 4214 hold_lkb(lkb);
4115 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 4215 memset(ms_stub, 0, sizeof(struct dlm_message));
4116 ls->ls_stub_ms.m_result = stub_cancel_result; 4216 ms_stub->m_flags = DLM_IFL_STUB_MS;
4117 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4217 ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
4118 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4218 ms_stub->m_result = stub_cancel_result;
4119 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 4219 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4220 _receive_cancel_reply(lkb, ms_stub);
4120 dlm_put_lkb(lkb); 4221 dlm_put_lkb(lkb);
4121 break; 4222 break;
4122 4223
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4127 schedule(); 4228 schedule();
4128 } 4229 }
4129 mutex_unlock(&ls->ls_waiters_mutex); 4230 mutex_unlock(&ls->ls_waiters_mutex);
4231 kfree(ms_stub);
4130} 4232}
4131 4233
4132static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) 4234static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
4191 ou = is_overlap_unlock(lkb); 4293 ou = is_overlap_unlock(lkb);
4192 err = 0; 4294 err = 0;
4193 4295
4194 log_debug(ls, "recover_waiters_post %x type %d flags %x %s", 4296 log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
4195 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); 4297 lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
4196 4298
4197 /* At this point we assume that we won't get a reply to any 4299 /* At this point we assume that we won't get a reply to any
4198 previous op or overlap op on this lock. First, do a big 4300 previous op or overlap op on this lock. First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
24void dlm_scan_rsbs(struct dlm_ls *ls); 24void dlm_scan_rsbs(struct dlm_ls *ls);
25int dlm_lock_recovery_try(struct dlm_ls *ls); 25int dlm_lock_recovery_try(struct dlm_ls *ls);
26void dlm_unlock_recovery(struct dlm_ls *ls); 26void dlm_unlock_recovery(struct dlm_ls *ls);
27void dlm_scan_waiters(struct dlm_ls *ls);
27void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
28void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
29 30
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
243static int dlm_scand(void *data) 243static int dlm_scand(void *data)
244{ 244{
245 struct dlm_ls *ls; 245 struct dlm_ls *ls;
246 int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
247 246
248 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
249 ls = find_ls_to_scan(); 248 ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
252 ls->ls_scan_time = jiffies; 251 ls->ls_scan_time = jiffies;
253 dlm_scan_rsbs(ls); 252 dlm_scan_rsbs(ls);
254 dlm_scan_timeout(ls); 253 dlm_scan_timeout(ls);
254 dlm_scan_waiters(ls);
255 dlm_unlock_recovery(ls); 255 dlm_unlock_recovery(ls);
256 } else { 256 } else {
257 ls->ls_scan_time += HZ; 257 ls->ls_scan_time += HZ;
258 } 258 }
259 } else { 259 continue;
260 schedule_timeout_interruptible(timeout_jiffies);
261 } 260 }
261 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
262 } 262 }
263 return 0; 263 return 0;
264} 264}
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index b80e0aa3cfa5..5a59efa0bb46 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -50,7 +50,7 @@ static int __init init_dlm(void)
50 if (error) 50 if (error)
51 goto out_netlink; 51 goto out_netlink;
52 52
53 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); 53 printk("DLM installed\n");
54 54
55 return 0; 55 return 0;
56 56
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
71 wake_up(&send_wq); 71 wake_up(&send_wq);
72} 72}
73 73
74/* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
79
80static void do_unlock_close(struct dlm_ls *ls, u64 number,
81 struct file *file, struct file_lock *fl)
82{
83 struct plock_op *op;
84
85 op = kzalloc(sizeof(*op), GFP_NOFS);
86 if (!op)
87 return;
88
89 op->info.optype = DLM_PLOCK_OP_UNLOCK;
90 op->info.pid = fl->fl_pid;
91 op->info.fsid = ls->ls_global_id;
92 op->info.number = number;
93 op->info.start = 0;
94 op->info.end = OFFSET_MAX;
95 if (fl->fl_lmops && fl->fl_lmops->fl_grant)
96 op->info.owner = (__u64) fl->fl_pid;
97 else
98 op->info.owner = (__u64)(long) fl->fl_owner;
99
100 op->info.flags |= DLM_PLOCK_FL_CLOSE;
101 send_op(op);
102}
103
74int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
75 int cmd, struct file_lock *fl) 105 int cmd, struct file_lock *fl)
76{ 106{
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
114 144
115 send_op(op); 145 send_op(op);
116 146
117 if (xop->callback == NULL) 147 if (xop->callback == NULL) {
118 wait_event(recv_wq, (op->done != 0)); 148 rv = wait_event_killable(recv_wq, (op->done != 0));
119 else { 149 if (rv == -ERESTARTSYS) {
150 log_debug(ls, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number);
152 spin_lock(&ops_lock);
153 list_del(&op->list);
154 spin_unlock(&ops_lock);
155 kfree(xop);
156 do_unlock_close(ls, number, file, fl);
157 goto out;
158 }
159 } else {
120 rv = FILE_LOCK_DEFERRED; 160 rv = FILE_LOCK_DEFERRED;
121 goto out; 161 goto out;
122 } 162 }
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
233 else 273 else
234 op->info.owner = (__u64)(long) fl->fl_owner; 274 op->info.owner = (__u64)(long) fl->fl_owner;
235 275
276 if (fl->fl_flags & FL_CLOSE) {
277 op->info.flags |= DLM_PLOCK_FL_CLOSE;
278 send_op(op);
279 rv = 0;
280 goto out;
281 }
282
236 send_op(op); 283 send_op(op);
237 wait_event(recv_wq, (op->done != 0)); 284 wait_event(recv_wq, (op->done != 0));
238 285
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
334 spin_lock(&ops_lock); 381 spin_lock(&ops_lock);
335 if (!list_empty(&send_list)) { 382 if (!list_empty(&send_list)) {
336 op = list_entry(send_list.next, struct plock_op, list); 383 op = list_entry(send_list.next, struct plock_op, list);
337 list_move(&op->list, &recv_list); 384 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
385 list_del(&op->list);
386 else
387 list_move(&op->list, &recv_list);
338 memcpy(&info, &op->info, sizeof(info)); 388 memcpy(&info, &op->info, sizeof(info));
339 } 389 }
340 spin_unlock(&ops_lock); 390 spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
342 if (!op) 392 if (!op)
343 return -EAGAIN; 393 return -EAGAIN;
344 394
395 /* there is no need to get a reply from userspace for unlocks
396 that were generated by the vfs cleaning up for a close
397 (the process did not make an unlock call). */
398
399 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
400 kfree(op);
401
345 if (copy_to_user(u, &info, sizeof(info))) 402 if (copy_to_user(u, &info, sizeof(info)))
346 return -EFAULT; 403 return -EFAULT;
347 return sizeof(info); 404 return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
611 611
612 out_sig: 612 out_sig:
613 sigprocmask(SIG_SETMASK, &tmpsig, NULL); 613 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
614 recalc_sigpending();
615 out_free: 614 out_free:
616 kfree(kbuf); 615 kfree(kbuf);
617 return error; 616 return error;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
40static void drop_slab(void) 40static void drop_slab(void)
41{ 41{
42 int nr_objects; 42 int nr_objects;
43 struct shrink_control shrink = {
44 .gfp_mask = GFP_KERNEL,
45 };
43 46
44 do { 47 do {
45 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); 48 nr_objects = shrink_slab(&shrink, 1000, 1000);
46 } while (nr_objects > 10); 49 } while (nr_objects > 10);
47} 50}
48 51
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4d4cc6a90cd5..bc116b9ffcf2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -521,12 +521,16 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
521 struct dentry *lower_dir_dentry; 521 struct dentry *lower_dir_dentry;
522 int rc; 522 int rc;
523 523
524 dentry_unhash(dentry);
525
524 lower_dentry = ecryptfs_dentry_to_lower(dentry); 526 lower_dentry = ecryptfs_dentry_to_lower(dentry);
525 dget(dentry); 527 dget(dentry);
526 lower_dir_dentry = lock_parent(lower_dentry); 528 lower_dir_dentry = lock_parent(lower_dentry);
527 dget(lower_dentry); 529 dget(lower_dentry);
528 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); 530 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
529 dput(lower_dentry); 531 dput(lower_dentry);
532 if (!rc && dentry->d_inode)
533 clear_nlink(dentry->d_inode);
530 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 534 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
531 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 535 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
532 unlock_dir(lower_dir_dentry); 536 unlock_dir(lower_dir_dentry);
@@ -571,6 +575,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
571 struct dentry *lower_new_dir_dentry; 575 struct dentry *lower_new_dir_dentry;
572 struct dentry *trap = NULL; 576 struct dentry *trap = NULL;
573 577
578 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
579 dentry_unhash(new_dentry);
580
574 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 581 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
575 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 582 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
576 dget(lower_old_dentry); 583 dget(lower_old_dentry);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 03e609c45012..27a7fefb83eb 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -599,8 +599,8 @@ struct ecryptfs_write_tag_70_packet_silly_stack {
599 struct mutex *tfm_mutex; 599 struct mutex *tfm_mutex;
600 char *block_aligned_filename; 600 char *block_aligned_filename;
601 struct ecryptfs_auth_tok *auth_tok; 601 struct ecryptfs_auth_tok *auth_tok;
602 struct scatterlist src_sg; 602 struct scatterlist src_sg[2];
603 struct scatterlist dst_sg; 603 struct scatterlist dst_sg[2];
604 struct blkcipher_desc desc; 604 struct blkcipher_desc desc;
605 char iv[ECRYPTFS_MAX_IV_BYTES]; 605 char iv[ECRYPTFS_MAX_IV_BYTES];
606 char hash[ECRYPTFS_TAG_70_DIGEST_SIZE]; 606 char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
@@ -816,23 +816,21 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
816 memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename, 816 memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename,
817 filename_size); 817 filename_size);
818 rc = virt_to_scatterlist(s->block_aligned_filename, 818 rc = virt_to_scatterlist(s->block_aligned_filename,
819 s->block_aligned_filename_size, &s->src_sg, 1); 819 s->block_aligned_filename_size, s->src_sg, 2);
820 if (rc != 1) { 820 if (rc < 1) {
821 printk(KERN_ERR "%s: Internal error whilst attempting to " 821 printk(KERN_ERR "%s: Internal error whilst attempting to "
822 "convert filename memory to scatterlist; " 822 "convert filename memory to scatterlist; rc = [%d]. "
823 "expected rc = 1; got rc = [%d]. "
824 "block_aligned_filename_size = [%zd]\n", __func__, rc, 823 "block_aligned_filename_size = [%zd]\n", __func__, rc,
825 s->block_aligned_filename_size); 824 s->block_aligned_filename_size);
826 goto out_release_free_unlock; 825 goto out_release_free_unlock;
827 } 826 }
828 rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size, 827 rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size,
829 &s->dst_sg, 1); 828 s->dst_sg, 2);
830 if (rc != 1) { 829 if (rc < 1) {
831 printk(KERN_ERR "%s: Internal error whilst attempting to " 830 printk(KERN_ERR "%s: Internal error whilst attempting to "
832 "convert encrypted filename memory to scatterlist; " 831 "convert encrypted filename memory to scatterlist; "
833 "expected rc = 1; got rc = [%d]. " 832 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
834 "block_aligned_filename_size = [%zd]\n", __func__, rc, 833 __func__, rc, s->block_aligned_filename_size);
835 s->block_aligned_filename_size);
836 goto out_release_free_unlock; 834 goto out_release_free_unlock;
837 } 835 }
838 /* The characters in the first block effectively do the job 836 /* The characters in the first block effectively do the job
@@ -855,7 +853,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
855 mount_crypt_stat->global_default_fn_cipher_key_bytes); 853 mount_crypt_stat->global_default_fn_cipher_key_bytes);
856 goto out_release_free_unlock; 854 goto out_release_free_unlock;
857 } 855 }
858 rc = crypto_blkcipher_encrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, 856 rc = crypto_blkcipher_encrypt_iv(&s->desc, s->dst_sg, s->src_sg,
859 s->block_aligned_filename_size); 857 s->block_aligned_filename_size);
860 if (rc) { 858 if (rc) {
861 printk(KERN_ERR "%s: Error attempting to encrypt filename; " 859 printk(KERN_ERR "%s: Error attempting to encrypt filename; "
@@ -891,8 +889,8 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
891 struct mutex *tfm_mutex; 889 struct mutex *tfm_mutex;
892 char *decrypted_filename; 890 char *decrypted_filename;
893 struct ecryptfs_auth_tok *auth_tok; 891 struct ecryptfs_auth_tok *auth_tok;
894 struct scatterlist src_sg; 892 struct scatterlist src_sg[2];
895 struct scatterlist dst_sg; 893 struct scatterlist dst_sg[2];
896 struct blkcipher_desc desc; 894 struct blkcipher_desc desc;
897 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; 895 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
898 char iv[ECRYPTFS_MAX_IV_BYTES]; 896 char iv[ECRYPTFS_MAX_IV_BYTES];
@@ -1008,13 +1006,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1008 } 1006 }
1009 mutex_lock(s->tfm_mutex); 1007 mutex_lock(s->tfm_mutex);
1010 rc = virt_to_scatterlist(&data[(*packet_size)], 1008 rc = virt_to_scatterlist(&data[(*packet_size)],
1011 s->block_aligned_filename_size, &s->src_sg, 1); 1009 s->block_aligned_filename_size, s->src_sg, 2);
1012 if (rc != 1) { 1010 if (rc < 1) {
1013 printk(KERN_ERR "%s: Internal error whilst attempting to " 1011 printk(KERN_ERR "%s: Internal error whilst attempting to "
1014 "convert encrypted filename memory to scatterlist; " 1012 "convert encrypted filename memory to scatterlist; "
1015 "expected rc = 1; got rc = [%d]. " 1013 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
1016 "block_aligned_filename_size = [%zd]\n", __func__, rc, 1014 __func__, rc, s->block_aligned_filename_size);
1017 s->block_aligned_filename_size);
1018 goto out_unlock; 1015 goto out_unlock;
1019 } 1016 }
1020 (*packet_size) += s->block_aligned_filename_size; 1017 (*packet_size) += s->block_aligned_filename_size;
@@ -1028,13 +1025,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1028 goto out_unlock; 1025 goto out_unlock;
1029 } 1026 }
1030 rc = virt_to_scatterlist(s->decrypted_filename, 1027 rc = virt_to_scatterlist(s->decrypted_filename,
1031 s->block_aligned_filename_size, &s->dst_sg, 1); 1028 s->block_aligned_filename_size, s->dst_sg, 2);
1032 if (rc != 1) { 1029 if (rc < 1) {
1033 printk(KERN_ERR "%s: Internal error whilst attempting to " 1030 printk(KERN_ERR "%s: Internal error whilst attempting to "
1034 "convert decrypted filename memory to scatterlist; " 1031 "convert decrypted filename memory to scatterlist; "
1035 "expected rc = 1; got rc = [%d]. " 1032 "rc = [%d]. block_aligned_filename_size = [%zd]\n",
1036 "block_aligned_filename_size = [%zd]\n", __func__, rc, 1033 __func__, rc, s->block_aligned_filename_size);
1037 s->block_aligned_filename_size);
1038 goto out_free_unlock; 1034 goto out_free_unlock;
1039 } 1035 }
1040 /* The characters in the first block effectively do the job of 1036 /* The characters in the first block effectively do the job of
@@ -1065,7 +1061,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
1065 mount_crypt_stat->global_default_fn_cipher_key_bytes); 1061 mount_crypt_stat->global_default_fn_cipher_key_bytes);
1066 goto out_free_unlock; 1062 goto out_free_unlock;
1067 } 1063 }
1068 rc = crypto_blkcipher_decrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, 1064 rc = crypto_blkcipher_decrypt_iv(&s->desc, s->dst_sg, s->src_sg,
1069 s->block_aligned_filename_size); 1065 s->block_aligned_filename_size);
1070 if (rc) { 1066 if (rc) {
1071 printk(KERN_ERR "%s: Error attempting to decrypt filename; " 1067 printk(KERN_ERR "%s: Error attempting to decrypt filename; "
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..ea5f748906a8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -42,7 +42,6 @@
42#include <linux/pid_namespace.h> 42#include <linux/pid_namespace.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/namei.h> 44#include <linux/namei.h>
45#include <linux/proc_fs.h>
46#include <linux/mount.h> 45#include <linux/mount.h>
47#include <linux/security.h> 46#include <linux/security.h>
48#include <linux/syscalls.h> 47#include <linux/syscalls.h>
@@ -55,6 +54,7 @@
55#include <linux/fs_struct.h> 54#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 55#include <linux/pipe_fs_i.h>
57#include <linux/oom.h> 56#include <linux/oom.h>
57#include <linux/compat.h>
58 58
59#include <asm/uaccess.h> 59#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
@@ -166,8 +166,13 @@ out:
166} 166}
167 167
168#ifdef CONFIG_MMU 168#ifdef CONFIG_MMU
169 169/*
170void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 170 * The nascent bprm->mm is not visible until exec_mmap() but it can
171 * use a lot of memory, account these pages in current->mm temporary
172 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
173 * change the counter back via acct_arg_size(0).
174 */
175static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
171{ 176{
172 struct mm_struct *mm = current->mm; 177 struct mm_struct *mm = current->mm;
173 long diff = (long)(pages - bprm->vma_pages); 178 long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +191,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
186#endif 191#endif
187} 192}
188 193
189struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 194static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
190 int write) 195 int write)
191{ 196{
192 struct page *page; 197 struct page *page;
@@ -194,7 +199,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
194 199
195#ifdef CONFIG_STACK_GROWSUP 200#ifdef CONFIG_STACK_GROWSUP
196 if (write) { 201 if (write) {
197 ret = expand_stack_downwards(bprm->vma, pos); 202 ret = expand_downwards(bprm->vma, pos);
198 if (ret < 0) 203 if (ret < 0)
199 return NULL; 204 return NULL;
200 } 205 }
@@ -305,11 +310,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
305 310
306#else 311#else
307 312
308void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 313static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
309{ 314{
310} 315}
311 316
312struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 317static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
313 int write) 318 int write)
314{ 319{
315 struct page *page; 320 struct page *page;
@@ -398,22 +403,56 @@ err:
398 return err; 403 return err;
399} 404}
400 405
406struct user_arg_ptr {
407#ifdef CONFIG_COMPAT
408 bool is_compat;
409#endif
410 union {
411 const char __user *const __user *native;
412#ifdef CONFIG_COMPAT
413 compat_uptr_t __user *compat;
414#endif
415 } ptr;
416};
417
418static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
419{
420 const char __user *native;
421
422#ifdef CONFIG_COMPAT
423 if (unlikely(argv.is_compat)) {
424 compat_uptr_t compat;
425
426 if (get_user(compat, argv.ptr.compat + nr))
427 return ERR_PTR(-EFAULT);
428
429 return compat_ptr(compat);
430 }
431#endif
432
433 if (get_user(native, argv.ptr.native + nr))
434 return ERR_PTR(-EFAULT);
435
436 return native;
437}
438
401/* 439/*
402 * count() counts the number of strings in array ARGV. 440 * count() counts the number of strings in array ARGV.
403 */ 441 */
404static int count(const char __user * const __user * argv, int max) 442static int count(struct user_arg_ptr argv, int max)
405{ 443{
406 int i = 0; 444 int i = 0;
407 445
408 if (argv != NULL) { 446 if (argv.ptr.native != NULL) {
409 for (;;) { 447 for (;;) {
410 const char __user * p; 448 const char __user *p = get_user_arg_ptr(argv, i);
411 449
412 if (get_user(p, argv))
413 return -EFAULT;
414 if (!p) 450 if (!p)
415 break; 451 break;
416 argv++; 452
453 if (IS_ERR(p))
454 return -EFAULT;
455
417 if (i++ >= max) 456 if (i++ >= max)
418 return -E2BIG; 457 return -E2BIG;
419 458
@@ -430,7 +469,7 @@ static int count(const char __user * const __user * argv, int max)
430 * processes's memory to the new process's stack. The call to get_user_pages() 469 * processes's memory to the new process's stack. The call to get_user_pages()
431 * ensures the destination page is created and not swapped out. 470 * ensures the destination page is created and not swapped out.
432 */ 471 */
433static int copy_strings(int argc, const char __user *const __user *argv, 472static int copy_strings(int argc, struct user_arg_ptr argv,
434 struct linux_binprm *bprm) 473 struct linux_binprm *bprm)
435{ 474{
436 struct page *kmapped_page = NULL; 475 struct page *kmapped_page = NULL;
@@ -443,16 +482,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
443 int len; 482 int len;
444 unsigned long pos; 483 unsigned long pos;
445 484
446 if (get_user(str, argv+argc) || 485 ret = -EFAULT;
447 !(len = strnlen_user(str, MAX_ARG_STRLEN))) { 486 str = get_user_arg_ptr(argv, argc);
448 ret = -EFAULT; 487 if (IS_ERR(str))
449 goto out; 488 goto out;
450 }
451 489
452 if (!valid_arg_len(bprm, len)) { 490 len = strnlen_user(str, MAX_ARG_STRLEN);
453 ret = -E2BIG; 491 if (!len)
492 goto out;
493
494 ret = -E2BIG;
495 if (!valid_arg_len(bprm, len))
454 goto out; 496 goto out;
455 }
456 497
457 /* We're going to work our way backwords. */ 498 /* We're going to work our way backwords. */
458 pos = bprm->p; 499 pos = bprm->p;
@@ -519,14 +560,19 @@ out:
519/* 560/*
520 * Like copy_strings, but get argv and its values from kernel memory. 561 * Like copy_strings, but get argv and its values from kernel memory.
521 */ 562 */
522int copy_strings_kernel(int argc, const char *const *argv, 563int copy_strings_kernel(int argc, const char *const *__argv,
523 struct linux_binprm *bprm) 564 struct linux_binprm *bprm)
524{ 565{
525 int r; 566 int r;
526 mm_segment_t oldfs = get_fs(); 567 mm_segment_t oldfs = get_fs();
568 struct user_arg_ptr argv = {
569 .ptr.native = (const char __user *const __user *)__argv,
570 };
571
527 set_fs(KERNEL_DS); 572 set_fs(KERNEL_DS);
528 r = copy_strings(argc, (const char __user *const __user *)argv, bprm); 573 r = copy_strings(argc, argv, bprm);
529 set_fs(oldfs); 574 set_fs(oldfs);
575
530 return r; 576 return r;
531} 577}
532EXPORT_SYMBOL(copy_strings_kernel); 578EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +599,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
553 unsigned long length = old_end - old_start; 599 unsigned long length = old_end - old_start;
554 unsigned long new_start = old_start - shift; 600 unsigned long new_start = old_start - shift;
555 unsigned long new_end = old_end - shift; 601 unsigned long new_end = old_end - shift;
556 struct mmu_gather *tlb; 602 struct mmu_gather tlb;
557 603
558 BUG_ON(new_start > new_end); 604 BUG_ON(new_start > new_end);
559 605
@@ -579,12 +625,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
579 return -ENOMEM; 625 return -ENOMEM;
580 626
581 lru_add_drain(); 627 lru_add_drain();
582 tlb = tlb_gather_mmu(mm, 0); 628 tlb_gather_mmu(&tlb, mm, 0);
583 if (new_end > old_start) { 629 if (new_end > old_start) {
584 /* 630 /*
585 * when the old and new regions overlap clear from new_end. 631 * when the old and new regions overlap clear from new_end.
586 */ 632 */
587 free_pgd_range(tlb, new_end, old_end, new_end, 633 free_pgd_range(&tlb, new_end, old_end, new_end,
588 vma->vm_next ? vma->vm_next->vm_start : 0); 634 vma->vm_next ? vma->vm_next->vm_start : 0);
589 } else { 635 } else {
590 /* 636 /*
@@ -593,10 +639,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
593 * have constraints on va-space that make this illegal (IA64) - 639 * have constraints on va-space that make this illegal (IA64) -
594 * for the others its just a little faster. 640 * for the others its just a little faster.
595 */ 641 */
596 free_pgd_range(tlb, old_start, old_end, new_end, 642 free_pgd_range(&tlb, old_start, old_end, new_end,
597 vma->vm_next ? vma->vm_next->vm_start : 0); 643 vma->vm_next ? vma->vm_next->vm_start : 0);
598 } 644 }
599 tlb_finish_mmu(tlb, new_end, old_end); 645 tlb_finish_mmu(&tlb, new_end, old_end);
600 646
601 /* 647 /*
602 * Shrink the vma to just the new range. Always succeeds. 648 * Shrink the vma to just the new range. Always succeeds.
@@ -1004,6 +1050,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1004 task_unlock(tsk); 1050 task_unlock(tsk);
1005 return buf; 1051 return buf;
1006} 1052}
1053EXPORT_SYMBOL_GPL(get_task_comm);
1007 1054
1008void set_task_comm(struct task_struct *tsk, char *buf) 1055void set_task_comm(struct task_struct *tsk, char *buf)
1009{ 1056{
@@ -1379,10 +1426,10 @@ EXPORT_SYMBOL(search_binary_handler);
1379/* 1426/*
1380 * sys_execve() executes a new program. 1427 * sys_execve() executes a new program.
1381 */ 1428 */
1382int do_execve(const char * filename, 1429static int do_execve_common(const char *filename,
1383 const char __user *const __user *argv, 1430 struct user_arg_ptr argv,
1384 const char __user *const __user *envp, 1431 struct user_arg_ptr envp,
1385 struct pt_regs * regs) 1432 struct pt_regs *regs)
1386{ 1433{
1387 struct linux_binprm *bprm; 1434 struct linux_binprm *bprm;
1388 struct file *file; 1435 struct file *file;
@@ -1489,6 +1536,34 @@ out_ret:
1489 return retval; 1536 return retval;
1490} 1537}
1491 1538
1539int do_execve(const char *filename,
1540 const char __user *const __user *__argv,
1541 const char __user *const __user *__envp,
1542 struct pt_regs *regs)
1543{
1544 struct user_arg_ptr argv = { .ptr.native = __argv };
1545 struct user_arg_ptr envp = { .ptr.native = __envp };
1546 return do_execve_common(filename, argv, envp, regs);
1547}
1548
1549#ifdef CONFIG_COMPAT
1550int compat_do_execve(char *filename,
1551 compat_uptr_t __user *__argv,
1552 compat_uptr_t __user *__envp,
1553 struct pt_regs *regs)
1554{
1555 struct user_arg_ptr argv = {
1556 .is_compat = true,
1557 .ptr.compat = __argv,
1558 };
1559 struct user_arg_ptr envp = {
1560 .is_compat = true,
1561 .ptr.compat = __envp,
1562 };
1563 return do_execve_common(filename, argv, envp, regs);
1564}
1565#endif
1566
1492void set_binfmt(struct linux_binfmt *new) 1567void set_binfmt(struct linux_binfmt *new)
1493{ 1568{
1494 struct mm_struct *mm = current->mm; 1569 struct mm_struct *mm = current->mm;
@@ -1548,6 +1623,41 @@ expand_fail:
1548 return ret; 1623 return ret;
1549} 1624}
1550 1625
1626static int cn_print_exe_file(struct core_name *cn)
1627{
1628 struct file *exe_file;
1629 char *pathbuf, *path, *p;
1630 int ret;
1631
1632 exe_file = get_mm_exe_file(current->mm);
1633 if (!exe_file)
1634 return cn_printf(cn, "(unknown)");
1635
1636 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
1637 if (!pathbuf) {
1638 ret = -ENOMEM;
1639 goto put_exe_file;
1640 }
1641
1642 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
1643 if (IS_ERR(path)) {
1644 ret = PTR_ERR(path);
1645 goto free_buf;
1646 }
1647
1648 for (p = path; *p; p++)
1649 if (*p == '/')
1650 *p = '!';
1651
1652 ret = cn_printf(cn, "%s", path);
1653
1654free_buf:
1655 kfree(pathbuf);
1656put_exe_file:
1657 fput(exe_file);
1658 return ret;
1659}
1660
1551/* format_corename will inspect the pattern parameter, and output a 1661/* format_corename will inspect the pattern parameter, and output a
1552 * name into corename, which must have space for at least 1662 * name into corename, which must have space for at least
1553 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1663 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
@@ -1619,6 +1729,9 @@ static int format_corename(struct core_name *cn, long signr)
1619 case 'e': 1729 case 'e':
1620 err = cn_printf(cn, "%s", current->comm); 1730 err = cn_printf(cn, "%s", current->comm);
1621 break; 1731 break;
1732 case 'E':
1733 err = cn_print_exe_file(cn);
1734 break;
1622 /* core limit size */ 1735 /* core limit size */
1623 case 'c': 1736 case 'c':
1624 err = cn_printf(cn, "%lu", 1737 err = cn_printf(cn, "%lu",
@@ -1659,6 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1659 1772
1660 t = start; 1773 t = start;
1661 do { 1774 do {
1775 task_clear_group_stop_pending(t);
1662 if (t != current && t->mm) { 1776 if (t != current && t->mm) {
1663 sigaddset(&t->pending.signal, SIGKILL); 1777 sigaddset(&t->pending.signal, SIGKILL);
1664 signal_wake_up(t, 1); 1778 signal_wake_up(t, 1);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0a78dae7e2cb..1dd62ed35b85 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
898 brelse(bh); 898 brelse(bh);
899 899
900 if (!sb_set_blocksize(sb, blocksize)) { 900 if (!sb_set_blocksize(sb, blocksize)) {
901 ext2_msg(sb, KERN_ERR, "error: blocksize is too small"); 901 ext2_msg(sb, KERN_ERR,
902 "error: bad blocksize %d", blocksize);
902 goto failed_sbi; 903 goto failed_sbi;
903 } 904 }
904 905
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 32f3b8695859..34b6d9bfc48a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1416 frame->at = entries; 1416 frame->at = entries;
1417 frame->bh = bh; 1417 frame->bh = bh;
1418 bh = bh2; 1418 bh = bh2;
1419 /*
1420 * Mark buffers dirty here so that if do_split() fails we write a
1421 * consistent set of buffers to disk.
1422 */
1423 ext3_journal_dirty_metadata(handle, frame->bh);
1424 ext3_journal_dirty_metadata(handle, bh);
1419 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1425 de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1420 dx_release (frames); 1426 if (!de) {
1421 if (!(de)) 1427 ext3_mark_inode_dirty(handle, dir);
1428 dx_release(frames);
1422 return retval; 1429 return retval;
1430 }
1431 dx_release(frames);
1423 1432
1424 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1433 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1425} 1434}
@@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir,
2189 handle_t *handle; 2198 handle_t *handle;
2190 struct inode * inode; 2199 struct inode * inode;
2191 int l, err, retries = 0; 2200 int l, err, retries = 0;
2201 int credits;
2192 2202
2193 l = strlen(symname)+1; 2203 l = strlen(symname)+1;
2194 if (l > dir->i_sb->s_blocksize) 2204 if (l > dir->i_sb->s_blocksize)
@@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir,
2196 2206
2197 dquot_initialize(dir); 2207 dquot_initialize(dir);
2198 2208
2209 if (l > EXT3_N_BLOCKS * 4) {
2210 /*
2211 * For non-fast symlinks, we just allocate inode and put it on
2212 * orphan list in the first transaction => we need bitmap,
2213 * group descriptor, sb, inode block, quota blocks.
2214 */
2215 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2216 } else {
2217 /*
2218 * Fast symlink. We have to add entry to directory
2219 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
2220 * allocate new inode (bitmap, group descriptor, inode block,
2221 * quota blocks, sb is already counted in previous macros).
2222 */
2223 credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2224 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2225 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2226 }
2199retry: 2227retry:
2200 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2228 handle = ext3_journal_start(dir, credits);
2201 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2202 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2203 if (IS_ERR(handle)) 2229 if (IS_ERR(handle))
2204 return PTR_ERR(handle); 2230 return PTR_ERR(handle);
2205 2231
@@ -2211,21 +2237,45 @@ retry:
2211 if (IS_ERR(inode)) 2237 if (IS_ERR(inode))
2212 goto out_stop; 2238 goto out_stop;
2213 2239
2214 if (l > sizeof (EXT3_I(inode)->i_data)) { 2240 if (l > EXT3_N_BLOCKS * 4) {
2215 inode->i_op = &ext3_symlink_inode_operations; 2241 inode->i_op = &ext3_symlink_inode_operations;
2216 ext3_set_aops(inode); 2242 ext3_set_aops(inode);
2217 /* 2243 /*
2218 * page_symlink() calls into ext3_prepare/commit_write. 2244 * We cannot call page_symlink() with transaction started
2219 * We have a transaction open. All is sweetness. It also sets 2245 * because it calls into ext3_write_begin() which acquires page
2220 * i_size in generic_commit_write(). 2246 * lock which ranks below transaction start (and it can also
2247 * wait for journal commit if we are running out of space). So
2248 * we have to stop transaction now and restart it when symlink
2249 * contents is written.
2250 *
2251 * To keep fs consistent in case of crash, we have to put inode
2252 * to orphan list in the mean time.
2221 */ 2253 */
2254 drop_nlink(inode);
2255 err = ext3_orphan_add(handle, inode);
2256 ext3_journal_stop(handle);
2257 if (err)
2258 goto err_drop_inode;
2222 err = __page_symlink(inode, symname, l, 1); 2259 err = __page_symlink(inode, symname, l, 1);
2260 if (err)
2261 goto err_drop_inode;
2262 /*
2263 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
2264 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2265 */
2266 handle = ext3_journal_start(dir,
2267 EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2268 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
2269 if (IS_ERR(handle)) {
2270 err = PTR_ERR(handle);
2271 goto err_drop_inode;
2272 }
2273 inc_nlink(inode);
2274 err = ext3_orphan_del(handle, inode);
2223 if (err) { 2275 if (err) {
2276 ext3_journal_stop(handle);
2224 drop_nlink(inode); 2277 drop_nlink(inode);
2225 unlock_new_inode(inode); 2278 goto err_drop_inode;
2226 ext3_mark_inode_dirty(handle, inode);
2227 iput (inode);
2228 goto out_stop;
2229 } 2279 }
2230 } else { 2280 } else {
2231 inode->i_op = &ext3_fast_symlink_inode_operations; 2281 inode->i_op = &ext3_fast_symlink_inode_operations;
@@ -2239,6 +2289,10 @@ out_stop:
2239 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2289 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2240 goto retry; 2290 goto retry;
2241 return err; 2291 return err;
2292err_drop_inode:
2293 unlock_new_inode(inode);
2294 iput(inode);
2295 return err;
2242} 2296}
2243 2297
2244static int ext3_link (struct dentry * old_dentry, 2298static int ext3_link (struct dentry * old_dentry,
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c6a9e0eadc1..aad153ef6b78 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -36,6 +36,7 @@
36#include <linux/quotaops.h> 36#include <linux/quotaops.h>
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/log2.h> 38#include <linux/log2.h>
39#include <linux/cleancache.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
@@ -1367,6 +1368,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1367 } else { 1368 } else {
1368 ext3_msg(sb, KERN_INFO, "using internal journal"); 1369 ext3_msg(sb, KERN_INFO, "using internal journal");
1369 } 1370 }
1371 cleancache_init_fs(sb);
1370 return res; 1372 return res;
1371} 1373}
1372 1374
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index c947e36eda6c..04109460ba9e 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
10 mmp.o
10 11
11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 12ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1c67139ad4b4..264f6949511e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -362,130 +362,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
362} 362}
363 363
364/** 364/**
365 * ext4_add_groupblocks() -- Add given blocks to an existing group
366 * @handle: handle to this transaction
367 * @sb: super block
368 * @block: start physcial block to add to the block group
369 * @count: number of blocks to free
370 *
371 * This marks the blocks as free in the bitmap. We ask the
372 * mballoc to reload the buddy after this by setting group
373 * EXT4_GROUP_INFO_NEED_INIT_BIT flag
374 */
375void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
376 ext4_fsblk_t block, unsigned long count)
377{
378 struct buffer_head *bitmap_bh = NULL;
379 struct buffer_head *gd_bh;
380 ext4_group_t block_group;
381 ext4_grpblk_t bit;
382 unsigned int i;
383 struct ext4_group_desc *desc;
384 struct ext4_sb_info *sbi = EXT4_SB(sb);
385 int err = 0, ret, blk_free_count;
386 ext4_grpblk_t blocks_freed;
387 struct ext4_group_info *grp;
388
389 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
390
391 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
392 grp = ext4_get_group_info(sb, block_group);
393 /*
394 * Check to see if we are freeing blocks across a group
395 * boundary.
396 */
397 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
398 goto error_return;
399 }
400 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
401 if (!bitmap_bh)
402 goto error_return;
403 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
404 if (!desc)
405 goto error_return;
406
407 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
408 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
409 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
410 in_range(block + count - 1, ext4_inode_table(sb, desc),
411 sbi->s_itb_per_group)) {
412 ext4_error(sb, "Adding blocks in system zones - "
413 "Block = %llu, count = %lu",
414 block, count);
415 goto error_return;
416 }
417
418 /*
419 * We are about to add blocks to the bitmap,
420 * so we need undo access.
421 */
422 BUFFER_TRACE(bitmap_bh, "getting undo access");
423 err = ext4_journal_get_undo_access(handle, bitmap_bh);
424 if (err)
425 goto error_return;
426
427 /*
428 * We are about to modify some metadata. Call the journal APIs
429 * to unshare ->b_data if a currently-committing transaction is
430 * using it
431 */
432 BUFFER_TRACE(gd_bh, "get_write_access");
433 err = ext4_journal_get_write_access(handle, gd_bh);
434 if (err)
435 goto error_return;
436 /*
437 * make sure we don't allow a parallel init on other groups in the
438 * same buddy cache
439 */
440 down_write(&grp->alloc_sem);
441 for (i = 0, blocks_freed = 0; i < count; i++) {
442 BUFFER_TRACE(bitmap_bh, "clear bit");
443 if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
444 bit + i, bitmap_bh->b_data)) {
445 ext4_error(sb, "bit already cleared for block %llu",
446 (ext4_fsblk_t)(block + i));
447 BUFFER_TRACE(bitmap_bh, "bit already cleared");
448 } else {
449 blocks_freed++;
450 }
451 }
452 ext4_lock_group(sb, block_group);
453 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
454 ext4_free_blks_set(sb, desc, blk_free_count);
455 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
456 ext4_unlock_group(sb, block_group);
457 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
458
459 if (sbi->s_log_groups_per_flex) {
460 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
461 atomic_add(blocks_freed,
462 &sbi->s_flex_groups[flex_group].free_blocks);
463 }
464 /*
465 * request to reload the buddy with the
466 * new bitmap information
467 */
468 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
469 grp->bb_free += blocks_freed;
470 up_write(&grp->alloc_sem);
471
472 /* We dirtied the bitmap block */
473 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
474 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
475
476 /* And the group descriptor block */
477 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
478 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
479 if (!err)
480 err = ret;
481
482error_return:
483 brelse(bitmap_bh);
484 ext4_std_error(sb, err);
485 return;
486}
487
488/**
489 * ext4_has_free_blocks() 365 * ext4_has_free_blocks()
490 * @sbi: in-core super block structure. 366 * @sbi: in-core super block structure.
491 * @nblocks: number of needed blocks 367 * @nblocks: number of needed blocks
@@ -493,7 +369,8 @@ error_return:
493 * Check if filesystem has nblocks free & available for allocation. 369 * Check if filesystem has nblocks free & available for allocation.
494 * On success return 1, return 0 on failure. 370 * On success return 1, return 0 on failure.
495 */ 371 */
496static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) 372static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
373 s64 nblocks, unsigned int flags)
497{ 374{
498 s64 free_blocks, dirty_blocks, root_blocks; 375 s64 free_blocks, dirty_blocks, root_blocks;
499 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 376 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
@@ -507,11 +384,6 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
507 EXT4_FREEBLOCKS_WATERMARK) { 384 EXT4_FREEBLOCKS_WATERMARK) {
508 free_blocks = percpu_counter_sum_positive(fbc); 385 free_blocks = percpu_counter_sum_positive(fbc);
509 dirty_blocks = percpu_counter_sum_positive(dbc); 386 dirty_blocks = percpu_counter_sum_positive(dbc);
510 if (dirty_blocks < 0) {
511 printk(KERN_CRIT "Dirty block accounting "
512 "went wrong %lld\n",
513 (long long)dirty_blocks);
514 }
515 } 387 }
516 /* Check whether we have space after 388 /* Check whether we have space after
517 * accounting for current dirty blocks & root reserved blocks. 389 * accounting for current dirty blocks & root reserved blocks.
@@ -522,7 +394,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
522 /* Hm, nope. Are (enough) root reserved blocks available? */ 394 /* Hm, nope. Are (enough) root reserved blocks available? */
523 if (sbi->s_resuid == current_fsuid() || 395 if (sbi->s_resuid == current_fsuid() ||
524 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 396 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
525 capable(CAP_SYS_RESOURCE)) { 397 capable(CAP_SYS_RESOURCE) ||
398 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
399
526 if (free_blocks >= (nblocks + dirty_blocks)) 400 if (free_blocks >= (nblocks + dirty_blocks))
527 return 1; 401 return 1;
528 } 402 }
@@ -531,9 +405,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
531} 405}
532 406
533int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 407int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
534 s64 nblocks) 408 s64 nblocks, unsigned int flags)
535{ 409{
536 if (ext4_has_free_blocks(sbi, nblocks)) { 410 if (ext4_has_free_blocks(sbi, nblocks, flags)) {
537 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); 411 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
538 return 0; 412 return 0;
539 } else 413 } else
@@ -554,7 +428,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
554 */ 428 */
555int ext4_should_retry_alloc(struct super_block *sb, int *retries) 429int ext4_should_retry_alloc(struct super_block *sb, int *retries)
556{ 430{
557 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || 431 if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
558 (*retries)++ > 3 || 432 (*retries)++ > 3 ||
559 !EXT4_SB(sb)->s_journal) 433 !EXT4_SB(sb)->s_journal)
560 return 0; 434 return 0;
@@ -577,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
577 * error stores in errp pointer 451 * error stores in errp pointer
578 */ 452 */
579ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 453ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp) 454 ext4_fsblk_t goal, unsigned int flags,
455 unsigned long *count, int *errp)
581{ 456{
582 struct ext4_allocation_request ar; 457 struct ext4_allocation_request ar;
583 ext4_fsblk_t ret; 458 ext4_fsblk_t ret;
@@ -587,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
587 ar.inode = inode; 462 ar.inode = inode;
588 ar.goal = goal; 463 ar.goal = goal;
589 ar.len = count ? *count : 1; 464 ar.len = count ? *count : 1;
465 ar.flags = flags;
590 466
591 ret = ext4_mb_new_blocks(handle, &ar, errp); 467 ret = ext4_mb_new_blocks(handle, &ar, errp);
592 if (count) 468 if (count)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4daaf2b753f4..a74b89c09f90 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t;
108#define EXT4_MB_DELALLOC_RESERVED 0x0400 108#define EXT4_MB_DELALLOC_RESERVED 0x0400
109/* We are doing stream allocation */ 109/* We are doing stream allocation */
110#define EXT4_MB_STREAM_ALLOC 0x0800 110#define EXT4_MB_STREAM_ALLOC 0x0800
111 111/* Use reserved root blocks if needed */
112#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
112 113
113struct ext4_allocation_request { 114struct ext4_allocation_request {
114 /* target inode for block we're allocating */ 115 /* target inode for block we're allocating */
@@ -209,6 +210,8 @@ struct ext4_io_submit {
209 */ 210 */
210#define EXT4_BAD_INO 1 /* Bad blocks inode */ 211#define EXT4_BAD_INO 1 /* Bad blocks inode */
211#define EXT4_ROOT_INO 2 /* Root inode */ 212#define EXT4_ROOT_INO 2 /* Root inode */
213#define EXT4_USR_QUOTA_INO 3 /* User quota inode */
214#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */
212#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ 215#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
213#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ 216#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
214#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ 217#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
@@ -512,6 +515,10 @@ struct ext4_new_group_data {
512 /* Convert extent to initialized after IO complete */ 515 /* Convert extent to initialized after IO complete */
513#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ 516#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
514 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 517 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
518 /* Punch out blocks of an extent */
519#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
520 /* Don't normalize allocation size (used for fallocate) */
521#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
515 522
516/* 523/*
517 * Flags used by ext4_free_blocks 524 * Flags used by ext4_free_blocks
@@ -1028,7 +1035,7 @@ struct ext4_super_block {
1028 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ 1035 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
1029 __le32 s_flags; /* Miscellaneous flags */ 1036 __le32 s_flags; /* Miscellaneous flags */
1030 __le16 s_raid_stride; /* RAID stride */ 1037 __le16 s_raid_stride; /* RAID stride */
1031 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ 1038 __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
1032 __le64 s_mmp_block; /* Block for multi-mount protection */ 1039 __le64 s_mmp_block; /* Block for multi-mount protection */
1033 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1040 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
1034 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1041 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
@@ -1144,6 +1151,9 @@ struct ext4_sb_info {
1144 unsigned long s_ext_blocks; 1151 unsigned long s_ext_blocks;
1145 unsigned long s_ext_extents; 1152 unsigned long s_ext_extents;
1146#endif 1153#endif
1154 /* ext4 extent cache stats */
1155 unsigned long extent_cache_hits;
1156 unsigned long extent_cache_misses;
1147 1157
1148 /* for buddy allocator */ 1158 /* for buddy allocator */
1149 struct ext4_group_info ***s_group_info; 1159 struct ext4_group_info ***s_group_info;
@@ -1201,6 +1211,9 @@ struct ext4_sb_info {
1201 struct ext4_li_request *s_li_request; 1211 struct ext4_li_request *s_li_request;
1202 /* Wait multiplier for lazy initialization thread */ 1212 /* Wait multiplier for lazy initialization thread */
1203 unsigned int s_li_wait_mult; 1213 unsigned int s_li_wait_mult;
1214
1215 /* Kernel thread for multiple mount protection */
1216 struct task_struct *s_mmp_tsk;
1204}; 1217};
1205 1218
1206static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1219static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1338,6 +1351,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1338#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 1351#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
1339#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 1352#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
1340#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1353#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1354#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1341 1355
1342#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1356#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1343#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1357#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1351,13 +1365,29 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1351#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1365#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1352#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1366#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1353 1367
1368#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1369#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1370 EXT4_FEATURE_INCOMPAT_META_BG)
1371#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1372 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1373 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
1374
1375#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1376#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1377 EXT4_FEATURE_INCOMPAT_RECOVER| \
1378 EXT4_FEATURE_INCOMPAT_META_BG)
1379#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1380 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1381 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
1382
1354#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR 1383#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
1355#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1384#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
1356 EXT4_FEATURE_INCOMPAT_RECOVER| \ 1385 EXT4_FEATURE_INCOMPAT_RECOVER| \
1357 EXT4_FEATURE_INCOMPAT_META_BG| \ 1386 EXT4_FEATURE_INCOMPAT_META_BG| \
1358 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1387 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1359 EXT4_FEATURE_INCOMPAT_64BIT| \ 1388 EXT4_FEATURE_INCOMPAT_64BIT| \
1360 EXT4_FEATURE_INCOMPAT_FLEX_BG) 1389 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1390 EXT4_FEATURE_INCOMPAT_MMP)
1361#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1391#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1362 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1392 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1363 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1393 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -1590,12 +1620,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
1590 */ 1620 */
1591struct ext4_lazy_init { 1621struct ext4_lazy_init {
1592 unsigned long li_state; 1622 unsigned long li_state;
1593
1594 wait_queue_head_t li_wait_daemon;
1595 wait_queue_head_t li_wait_task;
1596 struct timer_list li_timer;
1597 struct task_struct *li_task;
1598
1599 struct list_head li_request_list; 1623 struct list_head li_request_list;
1600 struct mutex li_list_mtx; 1624 struct mutex li_list_mtx;
1601}; 1625};
@@ -1615,6 +1639,67 @@ struct ext4_features {
1615}; 1639};
1616 1640
1617/* 1641/*
1642 * This structure will be used for multiple mount protection. It will be
1643 * written into the block number saved in the s_mmp_block field in the
1644 * superblock. Programs that check MMP should assume that if
1645 * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
1646 * to use the filesystem, regardless of how old the timestamp is.
1647 */
1648#define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
1649#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
1650#define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
1651#define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
1652
1653struct mmp_struct {
1654 __le32 mmp_magic; /* Magic number for MMP */
1655 __le32 mmp_seq; /* Sequence no. updated periodically */
1656
1657 /*
1658 * mmp_time, mmp_nodename & mmp_bdevname are only used for information
1659 * purposes and do not affect the correctness of the algorithm
1660 */
1661 __le64 mmp_time; /* Time last updated */
1662 char mmp_nodename[64]; /* Node which last updated MMP block */
1663 char mmp_bdevname[32]; /* Bdev which last updated MMP block */
1664
1665 /*
1666 * mmp_check_interval is used to verify if the MMP block has been
1667 * updated on the block device. The value is updated based on the
1668 * maximum time to write the MMP block during an update cycle.
1669 */
1670 __le16 mmp_check_interval;
1671
1672 __le16 mmp_pad1;
1673 __le32 mmp_pad2[227];
1674};
1675
1676/* arguments passed to the mmp thread */
1677struct mmpd_data {
1678 struct buffer_head *bh; /* bh from initial read_mmp_block() */
1679 struct super_block *sb; /* super block of the fs */
1680};
1681
1682/*
1683 * Check interval multiplier
1684 * The MMP block is written every update interval and initially checked every
1685 * update interval x the multiplier (the value is then adapted based on the
1686 * write latency). The reason is that writes can be delayed under load and we
1687 * don't want readers to incorrectly assume that the filesystem is no longer
1688 * in use.
1689 */
1690#define EXT4_MMP_CHECK_MULT 2UL
1691
1692/*
1693 * Minimum interval for MMP checking in seconds.
1694 */
1695#define EXT4_MMP_MIN_CHECK_INTERVAL 5UL
1696
1697/*
1698 * Maximum interval for MMP checking in seconds.
1699 */
1700#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
1701
1702/*
1618 * Function prototypes 1703 * Function prototypes
1619 */ 1704 */
1620 1705
@@ -1638,10 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
1638extern unsigned long ext4_bg_num_gdb(struct super_block *sb, 1723extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
1639 ext4_group_t group); 1724 ext4_group_t group);
1640extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1725extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1641 ext4_fsblk_t goal, unsigned long *count, int *errp); 1726 ext4_fsblk_t goal,
1642extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1727 unsigned int flags,
1643extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1728 unsigned long *count,
1644 ext4_fsblk_t block, unsigned long count); 1729 int *errp);
1730extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
1731 s64 nblocks, unsigned int flags);
1645extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1732extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
1646extern void ext4_check_blocks_bitmap(struct super_block *); 1733extern void ext4_check_blocks_bitmap(struct super_block *);
1647extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1734extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
@@ -1706,6 +1793,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1706 unsigned long count, int flags); 1793 unsigned long count, int flags);
1707extern int ext4_mb_add_groupinfo(struct super_block *sb, 1794extern int ext4_mb_add_groupinfo(struct super_block *sb,
1708 ext4_group_t i, struct ext4_group_desc *desc); 1795 ext4_group_t i, struct ext4_group_desc *desc);
1796extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1797 ext4_fsblk_t block, unsigned long count);
1709extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 1798extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
1710 1799
1711/* inode.c */ 1800/* inode.c */
@@ -1729,6 +1818,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
1729extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 1818extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1730extern int ext4_can_truncate(struct inode *inode); 1819extern int ext4_can_truncate(struct inode *inode);
1731extern void ext4_truncate(struct inode *); 1820extern void ext4_truncate(struct inode *);
1821extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
1732extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); 1822extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
1733extern void ext4_set_inode_flags(struct inode *); 1823extern void ext4_set_inode_flags(struct inode *);
1734extern void ext4_get_inode_flags(struct ext4_inode_info *); 1824extern void ext4_get_inode_flags(struct ext4_inode_info *);
@@ -1738,6 +1828,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
1738extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1828extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1739extern int ext4_block_truncate_page(handle_t *handle, 1829extern int ext4_block_truncate_page(handle_t *handle,
1740 struct address_space *mapping, loff_t from); 1830 struct address_space *mapping, loff_t from);
1831extern int ext4_block_zero_page_range(handle_t *handle,
1832 struct address_space *mapping, loff_t from, loff_t length);
1741extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1833extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1742extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1834extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1743extern void ext4_da_update_reserve_space(struct inode *inode, 1835extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1788,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int,
1788 __LINE__, ## message) 1880 __LINE__, ## message)
1789extern void ext4_msg(struct super_block *, const char *, const char *, ...) 1881extern void ext4_msg(struct super_block *, const char *, const char *, ...)
1790 __attribute__ ((format (printf, 3, 4))); 1882 __attribute__ ((format (printf, 3, 4)));
1883extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
1884 const char *, unsigned int, const char *);
1885#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \
1886 __LINE__, msg)
1791extern void __ext4_grp_locked_error(const char *, unsigned int, \ 1887extern void __ext4_grp_locked_error(const char *, unsigned int, \
1792 struct super_block *, ext4_group_t, \ 1888 struct super_block *, ext4_group_t, \
1793 unsigned long, ext4_fsblk_t, \ 1889 unsigned long, ext4_fsblk_t, \
@@ -2064,6 +2160,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
2064extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 2160extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
2065 struct ext4_map_blocks *map, int flags); 2161 struct ext4_map_blocks *map, int flags);
2066extern void ext4_ext_truncate(struct inode *); 2162extern void ext4_ext_truncate(struct inode *);
2163extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
2164 loff_t length);
2067extern void ext4_ext_init(struct super_block *); 2165extern void ext4_ext_init(struct super_block *);
2068extern void ext4_ext_release(struct super_block *); 2166extern void ext4_ext_release(struct super_block *);
2069extern long ext4_fallocate(struct file *file, int mode, loff_t offset, 2167extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
@@ -2092,6 +2190,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
2092 int len, 2190 int len,
2093 struct writeback_control *wbc); 2191 struct writeback_control *wbc);
2094 2192
2193/* mmp.c */
2194extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2195
2095/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ 2196/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
2096enum ext4_state_bits { 2197enum ext4_state_bits {
2097 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2198 BH_Uninit /* blocks are allocated but uninitialized on disk */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6e272ef6ba96..f5240aa15601 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,20 +6,6 @@
6 6
7#include <trace/events/ext4.h> 7#include <trace/events/ext4.h>
8 8
9int __ext4_journal_get_undo_access(const char *where, unsigned int line,
10 handle_t *handle, struct buffer_head *bh)
11{
12 int err = 0;
13
14 if (ext4_handle_valid(handle)) {
15 err = jbd2_journal_get_undo_access(handle, bh);
16 if (err)
17 ext4_journal_abort_handle(where, line, __func__, bh,
18 handle, err);
19 }
20 return err;
21}
22
23int __ext4_journal_get_write_access(const char *where, unsigned int line, 9int __ext4_journal_get_write_access(const char *where, unsigned int line,
24 handle_t *handle, struct buffer_head *bh) 10 handle_t *handle, struct buffer_head *bh)
25{ 11{
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d0f53538a57f..bb85757689b6 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
126 const char *err_fn, 126 const char *err_fn,
127 struct buffer_head *bh, handle_t *handle, int err); 127 struct buffer_head *bh, handle_t *handle, int err);
128 128
129int __ext4_journal_get_undo_access(const char *where, unsigned int line,
130 handle_t *handle, struct buffer_head *bh);
131
132int __ext4_journal_get_write_access(const char *where, unsigned int line, 129int __ext4_journal_get_write_access(const char *where, unsigned int line,
133 handle_t *handle, struct buffer_head *bh); 130 handle_t *handle, struct buffer_head *bh);
134 131
@@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
146int __ext4_handle_dirty_super(const char *where, unsigned int line, 143int __ext4_handle_dirty_super(const char *where, unsigned int line,
147 handle_t *handle, struct super_block *sb); 144 handle_t *handle, struct super_block *sb);
148 145
149#define ext4_journal_get_undo_access(handle, bh) \
150 __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
151#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
152 __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
153#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ 148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4890d6f3ad15..5199bac7fc62 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -46,6 +46,13 @@
46 46
47#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
48 48
49static int ext4_split_extent(handle_t *handle,
50 struct inode *inode,
51 struct ext4_ext_path *path,
52 struct ext4_map_blocks *map,
53 int split_flag,
54 int flags);
55
49static int ext4_ext_truncate_extend_restart(handle_t *handle, 56static int ext4_ext_truncate_extend_restart(handle_t *handle,
50 struct inode *inode, 57 struct inode *inode,
51 int needed) 58 int needed)
@@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
192static ext4_fsblk_t 199static ext4_fsblk_t
193ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, 200ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
194 struct ext4_ext_path *path, 201 struct ext4_ext_path *path,
195 struct ext4_extent *ex, int *err) 202 struct ext4_extent *ex, int *err, unsigned int flags)
196{ 203{
197 ext4_fsblk_t goal, newblock; 204 ext4_fsblk_t goal, newblock;
198 205
199 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 206 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
200 newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); 207 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
208 NULL, err);
201 return newblock; 209 return newblock;
202} 210}
203 211
@@ -474,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
474 } 482 }
475 ext_debug("\n"); 483 ext_debug("\n");
476} 484}
485
486static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
487 ext4_fsblk_t newblock, int level)
488{
489 int depth = ext_depth(inode);
490 struct ext4_extent *ex;
491
492 if (depth != level) {
493 struct ext4_extent_idx *idx;
494 idx = path[level].p_idx;
495 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
496 ext_debug("%d: move %d:%llu in new index %llu\n", level,
497 le32_to_cpu(idx->ei_block),
498 ext4_idx_pblock(idx),
499 newblock);
500 idx++;
501 }
502
503 return;
504 }
505
506 ex = path[depth].p_ext;
507 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
508 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
509 le32_to_cpu(ex->ee_block),
510 ext4_ext_pblock(ex),
511 ext4_ext_is_uninitialized(ex),
512 ext4_ext_get_actual_len(ex),
513 newblock);
514 ex++;
515 }
516}
517
477#else 518#else
478#define ext4_ext_show_path(inode, path) 519#define ext4_ext_show_path(inode, path)
479#define ext4_ext_show_leaf(inode, path) 520#define ext4_ext_show_leaf(inode, path)
521#define ext4_ext_show_move(inode, path, newblock, level)
480#endif 522#endif
481 523
482void ext4_ext_drop_refs(struct ext4_ext_path *path) 524void ext4_ext_drop_refs(struct ext4_ext_path *path)
@@ -792,14 +834,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
792 * - initializes subtree 834 * - initializes subtree
793 */ 835 */
794static int ext4_ext_split(handle_t *handle, struct inode *inode, 836static int ext4_ext_split(handle_t *handle, struct inode *inode,
795 struct ext4_ext_path *path, 837 unsigned int flags,
796 struct ext4_extent *newext, int at) 838 struct ext4_ext_path *path,
839 struct ext4_extent *newext, int at)
797{ 840{
798 struct buffer_head *bh = NULL; 841 struct buffer_head *bh = NULL;
799 int depth = ext_depth(inode); 842 int depth = ext_depth(inode);
800 struct ext4_extent_header *neh; 843 struct ext4_extent_header *neh;
801 struct ext4_extent_idx *fidx; 844 struct ext4_extent_idx *fidx;
802 struct ext4_extent *ex;
803 int i = at, k, m, a; 845 int i = at, k, m, a;
804 ext4_fsblk_t newblock, oldblock; 846 ext4_fsblk_t newblock, oldblock;
805 __le32 border; 847 __le32 border;
@@ -847,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
847 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 889 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
848 for (a = 0; a < depth - at; a++) { 890 for (a = 0; a < depth - at; a++) {
849 newblock = ext4_ext_new_meta_block(handle, inode, path, 891 newblock = ext4_ext_new_meta_block(handle, inode, path,
850 newext, &err); 892 newext, &err, flags);
851 if (newblock == 0) 893 if (newblock == 0)
852 goto cleanup; 894 goto cleanup;
853 ablocks[a] = newblock; 895 ablocks[a] = newblock;
@@ -876,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
876 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 918 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
877 neh->eh_magic = EXT4_EXT_MAGIC; 919 neh->eh_magic = EXT4_EXT_MAGIC;
878 neh->eh_depth = 0; 920 neh->eh_depth = 0;
879 ex = EXT_FIRST_EXTENT(neh);
880 921
881 /* move remainder of path[depth] to the new leaf */ 922 /* move remainder of path[depth] to the new leaf */
882 if (unlikely(path[depth].p_hdr->eh_entries != 923 if (unlikely(path[depth].p_hdr->eh_entries !=
@@ -888,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
888 goto cleanup; 929 goto cleanup;
889 } 930 }
890 /* start copy from next extent */ 931 /* start copy from next extent */
891 /* TODO: we could do it by single memmove */ 932 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
892 m = 0; 933 ext4_ext_show_move(inode, path, newblock, depth);
893 path[depth].p_ext++;
894 while (path[depth].p_ext <=
895 EXT_MAX_EXTENT(path[depth].p_hdr)) {
896 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
897 le32_to_cpu(path[depth].p_ext->ee_block),
898 ext4_ext_pblock(path[depth].p_ext),
899 ext4_ext_is_uninitialized(path[depth].p_ext),
900 ext4_ext_get_actual_len(path[depth].p_ext),
901 newblock);
902 /*memmove(ex++, path[depth].p_ext++,
903 sizeof(struct ext4_extent));
904 neh->eh_entries++;*/
905 path[depth].p_ext++;
906 m++;
907 }
908 if (m) { 934 if (m) {
909 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 935 struct ext4_extent *ex;
936 ex = EXT_FIRST_EXTENT(neh);
937 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
910 le16_add_cpu(&neh->eh_entries, m); 938 le16_add_cpu(&neh->eh_entries, m);
911 } 939 }
912 940
@@ -968,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
968 996
969 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 997 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
970 i, newblock, le32_to_cpu(border), oldblock); 998 i, newblock, le32_to_cpu(border), oldblock);
971 /* copy indexes */
972 m = 0;
973 path[i].p_idx++;
974 999
975 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 1000 /* move remainder of path[i] to the new index block */
976 EXT_MAX_INDEX(path[i].p_hdr));
977 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 1001 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
978 EXT_LAST_INDEX(path[i].p_hdr))) { 1002 EXT_LAST_INDEX(path[i].p_hdr))) {
979 EXT4_ERROR_INODE(inode, 1003 EXT4_ERROR_INODE(inode,
@@ -982,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
982 err = -EIO; 1006 err = -EIO;
983 goto cleanup; 1007 goto cleanup;
984 } 1008 }
985 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 1009 /* start copy indexes */
986 ext_debug("%d: move %d:%llu in new index %llu\n", i, 1010 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
987 le32_to_cpu(path[i].p_idx->ei_block), 1011 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
988 ext4_idx_pblock(path[i].p_idx), 1012 EXT_MAX_INDEX(path[i].p_hdr));
989 newblock); 1013 ext4_ext_show_move(inode, path, newblock, i);
990 /*memmove(++fidx, path[i].p_idx++,
991 sizeof(struct ext4_extent_idx));
992 neh->eh_entries++;
993 BUG_ON(neh->eh_entries > neh->eh_max);*/
994 path[i].p_idx++;
995 m++;
996 }
997 if (m) { 1014 if (m) {
998 memmove(++fidx, path[i].p_idx - m, 1015 memmove(++fidx, path[i].p_idx,
999 sizeof(struct ext4_extent_idx) * m); 1016 sizeof(struct ext4_extent_idx) * m);
1000 le16_add_cpu(&neh->eh_entries, m); 1017 le16_add_cpu(&neh->eh_entries, m);
1001 } 1018 }
@@ -1056,8 +1073,9 @@ cleanup:
1056 * just created block 1073 * just created block
1057 */ 1074 */
1058static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1075static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1059 struct ext4_ext_path *path, 1076 unsigned int flags,
1060 struct ext4_extent *newext) 1077 struct ext4_ext_path *path,
1078 struct ext4_extent *newext)
1061{ 1079{
1062 struct ext4_ext_path *curp = path; 1080 struct ext4_ext_path *curp = path;
1063 struct ext4_extent_header *neh; 1081 struct ext4_extent_header *neh;
@@ -1065,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1065 ext4_fsblk_t newblock; 1083 ext4_fsblk_t newblock;
1066 int err = 0; 1084 int err = 0;
1067 1085
1068 newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); 1086 newblock = ext4_ext_new_meta_block(handle, inode, path,
1087 newext, &err, flags);
1069 if (newblock == 0) 1088 if (newblock == 0)
1070 return err; 1089 return err;
1071 1090
@@ -1140,8 +1159,9 @@ out:
1140 * if no free index is found, then it requests in-depth growing. 1159 * if no free index is found, then it requests in-depth growing.
1141 */ 1160 */
1142static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1161static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1143 struct ext4_ext_path *path, 1162 unsigned int flags,
1144 struct ext4_extent *newext) 1163 struct ext4_ext_path *path,
1164 struct ext4_extent *newext)
1145{ 1165{
1146 struct ext4_ext_path *curp; 1166 struct ext4_ext_path *curp;
1147 int depth, i, err = 0; 1167 int depth, i, err = 0;
@@ -1161,7 +1181,7 @@ repeat:
1161 if (EXT_HAS_FREE_INDEX(curp)) { 1181 if (EXT_HAS_FREE_INDEX(curp)) {
1162 /* if we found index with free entry, then use that 1182 /* if we found index with free entry, then use that
1163 * entry: create all needed subtree and add new leaf */ 1183 * entry: create all needed subtree and add new leaf */
1164 err = ext4_ext_split(handle, inode, path, newext, i); 1184 err = ext4_ext_split(handle, inode, flags, path, newext, i);
1165 if (err) 1185 if (err)
1166 goto out; 1186 goto out;
1167 1187
@@ -1174,7 +1194,8 @@ repeat:
1174 err = PTR_ERR(path); 1194 err = PTR_ERR(path);
1175 } else { 1195 } else {
1176 /* tree is full, time to grow in depth */ 1196 /* tree is full, time to grow in depth */
1177 err = ext4_ext_grow_indepth(handle, inode, path, newext); 1197 err = ext4_ext_grow_indepth(handle, inode, flags,
1198 path, newext);
1178 if (err) 1199 if (err)
1179 goto out; 1200 goto out;
1180 1201
@@ -1563,7 +1584,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1563 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1584 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1564 * 1 if they got merged. 1585 * 1 if they got merged.
1565 */ 1586 */
1566static int ext4_ext_try_to_merge(struct inode *inode, 1587static int ext4_ext_try_to_merge_right(struct inode *inode,
1567 struct ext4_ext_path *path, 1588 struct ext4_ext_path *path,
1568 struct ext4_extent *ex) 1589 struct ext4_extent *ex)
1569{ 1590{
@@ -1603,6 +1624,31 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1603} 1624}
1604 1625
1605/* 1626/*
1627 * This function tries to merge the @ex extent to neighbours in the tree.
1628 * return 1 if merge left else 0.
1629 */
1630static int ext4_ext_try_to_merge(struct inode *inode,
1631 struct ext4_ext_path *path,
1632 struct ext4_extent *ex) {
1633 struct ext4_extent_header *eh;
1634 unsigned int depth;
1635 int merge_done = 0;
1636 int ret = 0;
1637
1638 depth = ext_depth(inode);
1639 BUG_ON(path[depth].p_hdr == NULL);
1640 eh = path[depth].p_hdr;
1641
1642 if (ex > EXT_FIRST_EXTENT(eh))
1643 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1644
1645 if (!merge_done)
1646 ret = ext4_ext_try_to_merge_right(inode, path, ex);
1647
1648 return ret;
1649}
1650
1651/*
1606 * check if a portion of the "newext" extent overlaps with an 1652 * check if a portion of the "newext" extent overlaps with an
1607 * existing extent. 1653 * existing extent.
1608 * 1654 *
@@ -1668,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1668 int depth, len, err; 1714 int depth, len, err;
1669 ext4_lblk_t next; 1715 ext4_lblk_t next;
1670 unsigned uninitialized = 0; 1716 unsigned uninitialized = 0;
1717 int flags = 0;
1671 1718
1672 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1719 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1673 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1720 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1742,7 +1789,9 @@ repeat:
1742 * There is no free space in the found leaf. 1789 * There is no free space in the found leaf.
1743 * We're gonna add a new leaf in the tree. 1790 * We're gonna add a new leaf in the tree.
1744 */ 1791 */
1745 err = ext4_ext_create_new_leaf(handle, inode, path, newext); 1792 if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
1793 flags = EXT4_MB_USE_ROOT_BLOCKS;
1794 err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
1746 if (err) 1795 if (err)
1747 goto cleanup; 1796 goto cleanup;
1748 depth = ext_depth(inode); 1797 depth = ext_depth(inode);
@@ -2003,13 +2052,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2003} 2052}
2004 2053
2005/* 2054/*
2055 * ext4_ext_in_cache()
2056 * Checks to see if the given block is in the cache.
2057 * If it is, the cached extent is stored in the given
2058 * cache extent pointer. If the cached extent is a hole,
2059 * this routine should be used instead of
2060 * ext4_ext_in_cache if the calling function needs to
2061 * know the size of the hole.
2062 *
2063 * @inode: The files inode
2064 * @block: The block to look for in the cache
2065 * @ex: Pointer where the cached extent will be stored
2066 * if it contains block
2067 *
2006 * Return 0 if cache is invalid; 1 if the cache is valid 2068 * Return 0 if cache is invalid; 1 if the cache is valid
2007 */ 2069 */
2008static int 2070static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2009ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2071 struct ext4_ext_cache *ex){
2010 struct ext4_extent *ex)
2011{
2012 struct ext4_ext_cache *cex; 2072 struct ext4_ext_cache *cex;
2073 struct ext4_sb_info *sbi;
2013 int ret = 0; 2074 int ret = 0;
2014 2075
2015 /* 2076 /*
@@ -2017,26 +2078,60 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2017 */ 2078 */
2018 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2079 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2019 cex = &EXT4_I(inode)->i_cached_extent; 2080 cex = &EXT4_I(inode)->i_cached_extent;
2081 sbi = EXT4_SB(inode->i_sb);
2020 2082
2021 /* has cache valid data? */ 2083 /* has cache valid data? */
2022 if (cex->ec_len == 0) 2084 if (cex->ec_len == 0)
2023 goto errout; 2085 goto errout;
2024 2086
2025 if (in_range(block, cex->ec_block, cex->ec_len)) { 2087 if (in_range(block, cex->ec_block, cex->ec_len)) {
2026 ex->ee_block = cpu_to_le32(cex->ec_block); 2088 memcpy(ex, cex, sizeof(struct ext4_ext_cache));
2027 ext4_ext_store_pblock(ex, cex->ec_start);
2028 ex->ee_len = cpu_to_le16(cex->ec_len);
2029 ext_debug("%u cached by %u:%u:%llu\n", 2089 ext_debug("%u cached by %u:%u:%llu\n",
2030 block, 2090 block,
2031 cex->ec_block, cex->ec_len, cex->ec_start); 2091 cex->ec_block, cex->ec_len, cex->ec_start);
2032 ret = 1; 2092 ret = 1;
2033 } 2093 }
2034errout: 2094errout:
2095 if (!ret)
2096 sbi->extent_cache_misses++;
2097 else
2098 sbi->extent_cache_hits++;
2035 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2099 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2036 return ret; 2100 return ret;
2037} 2101}
2038 2102
2039/* 2103/*
2104 * ext4_ext_in_cache()
2105 * Checks to see if the given block is in the cache.
2106 * If it is, the cached extent is stored in the given
2107 * extent pointer.
2108 *
2109 * @inode: The files inode
2110 * @block: The block to look for in the cache
2111 * @ex: Pointer where the cached extent will be stored
2112 * if it contains block
2113 *
2114 * Return 0 if cache is invalid; 1 if the cache is valid
2115 */
2116static int
2117ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2118 struct ext4_extent *ex)
2119{
2120 struct ext4_ext_cache cex;
2121 int ret = 0;
2122
2123 if (ext4_ext_check_cache(inode, block, &cex)) {
2124 ex->ee_block = cpu_to_le32(cex.ec_block);
2125 ext4_ext_store_pblock(ex, cex.ec_start);
2126 ex->ee_len = cpu_to_le16(cex.ec_len);
2127 ret = 1;
2128 }
2129
2130 return ret;
2131}
2132
2133
2134/*
2040 * ext4_ext_rm_idx: 2135 * ext4_ext_rm_idx:
2041 * removes index from the index block. 2136 * removes index from the index block.
2042 * It's used in truncate case only, thus all requests are for 2137 * It's used in truncate case only, thus all requests are for
@@ -2163,8 +2258,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2163 ext4_free_blocks(handle, inode, NULL, start, num, flags); 2258 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2164 } else if (from == le32_to_cpu(ex->ee_block) 2259 } else if (from == le32_to_cpu(ex->ee_block)
2165 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2260 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2166 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2261 /* head removal */
2167 from, to, le32_to_cpu(ex->ee_block), ee_len); 2262 ext4_lblk_t num;
2263 ext4_fsblk_t start;
2264
2265 num = to - from;
2266 start = ext4_ext_pblock(ex);
2267
2268 ext_debug("free first %u blocks starting %llu\n", num, start);
2269 ext4_free_blocks(handle, inode, 0, start, num, flags);
2270
2168 } else { 2271 } else {
2169 printk(KERN_INFO "strange request: removal(2) " 2272 printk(KERN_INFO "strange request: removal(2) "
2170 "%u-%u from %u:%u\n", 2273 "%u-%u from %u:%u\n",
@@ -2173,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2173 return 0; 2276 return 0;
2174} 2277}
2175 2278
2279
2280/*
2281 * ext4_ext_rm_leaf() Removes the extents associated with the
2282 * blocks appearing between "start" and "end", and splits the extents
2283 * if "start" and "end" appear in the same extent
2284 *
2285 * @handle: The journal handle
2286 * @inode: The files inode
2287 * @path: The path to the leaf
2288 * @start: The first block to remove
2289 * @end: The last block to remove
2290 */
2176static int 2291static int
2177ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2292ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2178 struct ext4_ext_path *path, ext4_lblk_t start) 2293 struct ext4_ext_path *path, ext4_lblk_t start,
2294 ext4_lblk_t end)
2179{ 2295{
2180 int err = 0, correct_index = 0; 2296 int err = 0, correct_index = 0;
2181 int depth = ext_depth(inode), credits; 2297 int depth = ext_depth(inode), credits;
@@ -2186,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2186 unsigned short ex_ee_len; 2302 unsigned short ex_ee_len;
2187 unsigned uninitialized = 0; 2303 unsigned uninitialized = 0;
2188 struct ext4_extent *ex; 2304 struct ext4_extent *ex;
2305 struct ext4_map_blocks map;
2189 2306
2190 /* the header must be checked already in ext4_ext_remove_space() */ 2307 /* the header must be checked already in ext4_ext_remove_space() */
2191 ext_debug("truncate since %u in leaf\n", start); 2308 ext_debug("truncate since %u in leaf\n", start);
@@ -2215,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2215 path[depth].p_ext = ex; 2332 path[depth].p_ext = ex;
2216 2333
2217 a = ex_ee_block > start ? ex_ee_block : start; 2334 a = ex_ee_block > start ? ex_ee_block : start;
2218 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? 2335 b = ex_ee_block+ex_ee_len - 1 < end ?
2219 ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; 2336 ex_ee_block+ex_ee_len - 1 : end;
2220 2337
2221 ext_debug(" border %u:%u\n", a, b); 2338 ext_debug(" border %u:%u\n", a, b);
2222 2339
2223 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 2340 /* If this extent is beyond the end of the hole, skip it */
2224 block = 0; 2341 if (end <= ex_ee_block) {
2225 num = 0; 2342 ex--;
2226 BUG(); 2343 ex_ee_block = le32_to_cpu(ex->ee_block);
2344 ex_ee_len = ext4_ext_get_actual_len(ex);
2345 continue;
2346 } else if (a != ex_ee_block &&
2347 b != ex_ee_block + ex_ee_len - 1) {
2348 /*
2349 * If this is a truncate, then this condition should
2350 * never happen because at least one of the end points
2351 * needs to be on the edge of the extent.
2352 */
2353 if (end == EXT_MAX_BLOCK) {
2354 ext_debug(" bad truncate %u:%u\n",
2355 start, end);
2356 block = 0;
2357 num = 0;
2358 err = -EIO;
2359 goto out;
2360 }
2361 /*
2362 * else this is a hole punch, so the extent needs to
2363 * be split since neither edge of the hole is on the
2364 * extent edge
2365 */
2366 else{
2367 map.m_pblk = ext4_ext_pblock(ex);
2368 map.m_lblk = ex_ee_block;
2369 map.m_len = b - ex_ee_block;
2370
2371 err = ext4_split_extent(handle,
2372 inode, path, &map, 0,
2373 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
2374 EXT4_GET_BLOCKS_PRE_IO);
2375
2376 if (err < 0)
2377 goto out;
2378
2379 ex_ee_len = ext4_ext_get_actual_len(ex);
2380
2381 b = ex_ee_block+ex_ee_len - 1 < end ?
2382 ex_ee_block+ex_ee_len - 1 : end;
2383
2384 /* Then remove tail of this extent */
2385 block = ex_ee_block;
2386 num = a - block;
2387 }
2227 } else if (a != ex_ee_block) { 2388 } else if (a != ex_ee_block) {
2228 /* remove tail of the extent */ 2389 /* remove tail of the extent */
2229 block = ex_ee_block; 2390 block = ex_ee_block;
2230 num = a - block; 2391 num = a - block;
2231 } else if (b != ex_ee_block + ex_ee_len - 1) { 2392 } else if (b != ex_ee_block + ex_ee_len - 1) {
2232 /* remove head of the extent */ 2393 /* remove head of the extent */
2233 block = a; 2394 block = b;
2234 num = b - a; 2395 num = ex_ee_block + ex_ee_len - b;
2235 /* there is no "make a hole" API yet */ 2396
2236 BUG(); 2397 /*
2398 * If this is a truncate, this condition
2399 * should never happen
2400 */
2401 if (end == EXT_MAX_BLOCK) {
2402 ext_debug(" bad truncate %u:%u\n",
2403 start, end);
2404 err = -EIO;
2405 goto out;
2406 }
2237 } else { 2407 } else {
2238 /* remove whole extent: excellent! */ 2408 /* remove whole extent: excellent! */
2239 block = ex_ee_block; 2409 block = ex_ee_block;
2240 num = 0; 2410 num = 0;
2241 BUG_ON(a != ex_ee_block); 2411 if (a != ex_ee_block) {
2242 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 2412 ext_debug(" bad truncate %u:%u\n",
2413 start, end);
2414 err = -EIO;
2415 goto out;
2416 }
2417
2418 if (b != ex_ee_block + ex_ee_len - 1) {
2419 ext_debug(" bad truncate %u:%u\n",
2420 start, end);
2421 err = -EIO;
2422 goto out;
2423 }
2243 } 2424 }
2244 2425
2245 /* 2426 /*
@@ -2270,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2270 if (num == 0) { 2451 if (num == 0) {
2271 /* this extent is removed; mark slot entirely unused */ 2452 /* this extent is removed; mark slot entirely unused */
2272 ext4_ext_store_pblock(ex, 0); 2453 ext4_ext_store_pblock(ex, 0);
2273 le16_add_cpu(&eh->eh_entries, -1); 2454 } else if (block != ex_ee_block) {
2455 /*
2456 * If this was a head removal, then we need to update
2457 * the physical block since it is now at a different
2458 * location
2459 */
2460 ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
2274 } 2461 }
2275 2462
2276 ex->ee_block = cpu_to_le32(block); 2463 ex->ee_block = cpu_to_le32(block);
@@ -2286,6 +2473,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2286 if (err) 2473 if (err)
2287 goto out; 2474 goto out;
2288 2475
2476 /*
2477 * If the extent was completely released,
2478 * we need to remove it from the leaf
2479 */
2480 if (num == 0) {
2481 if (end != EXT_MAX_BLOCK) {
2482 /*
2483 * For hole punching, we need to scoot all the
2484 * extents up when an extent is removed so that
2485 * we dont have blank extents in the middle
2486 */
2487 memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2488 sizeof(struct ext4_extent));
2489
2490 /* Now get rid of the one at the end */
2491 memset(EXT_LAST_EXTENT(eh), 0,
2492 sizeof(struct ext4_extent));
2493 }
2494 le16_add_cpu(&eh->eh_entries, -1);
2495 }
2496
2289 ext_debug("new extent: %u:%u:%llu\n", block, num, 2497 ext_debug("new extent: %u:%u:%llu\n", block, num,
2290 ext4_ext_pblock(ex)); 2498 ext4_ext_pblock(ex));
2291 ex--; 2499 ex--;
@@ -2326,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
2326 return 1; 2534 return 1;
2327} 2535}
2328 2536
2329static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2537static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2538 ext4_lblk_t end)
2330{ 2539{
2331 struct super_block *sb = inode->i_sb; 2540 struct super_block *sb = inode->i_sb;
2332 int depth = ext_depth(inode); 2541 int depth = ext_depth(inode);
@@ -2365,7 +2574,8 @@ again:
2365 while (i >= 0 && err == 0) { 2574 while (i >= 0 && err == 0) {
2366 if (i == depth) { 2575 if (i == depth) {
2367 /* this is leaf block */ 2576 /* this is leaf block */
2368 err = ext4_ext_rm_leaf(handle, inode, path, start); 2577 err = ext4_ext_rm_leaf(handle, inode, path,
2578 start, end);
2369 /* root level has p_bh == NULL, brelse() eats this */ 2579 /* root level has p_bh == NULL, brelse() eats this */
2370 brelse(path[i].p_bh); 2580 brelse(path[i].p_bh);
2371 path[i].p_bh = NULL; 2581 path[i].p_bh = NULL;
@@ -2529,6 +2739,195 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2529 return ret; 2739 return ret;
2530} 2740}
2531 2741
2742/*
2743 * used by extent splitting.
2744 */
2745#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
2746 due to ENOSPC */
2747#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
2748#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
2749
2750/*
2751 * ext4_split_extent_at() splits an extent at given block.
2752 *
2753 * @handle: the journal handle
2754 * @inode: the file inode
2755 * @path: the path to the extent
2756 * @split: the logical block where the extent is splitted.
2757 * @split_flags: indicates if the extent could be zeroout if split fails, and
2758 * the states(init or uninit) of new extents.
2759 * @flags: flags used to insert new extent to extent tree.
2760 *
2761 *
2762 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
2763 * of which are deterimined by split_flag.
2764 *
2765 * There are two cases:
2766 * a> the extent are splitted into two extent.
2767 * b> split is not needed, and just mark the extent.
2768 *
2769 * return 0 on success.
2770 */
2771static int ext4_split_extent_at(handle_t *handle,
2772 struct inode *inode,
2773 struct ext4_ext_path *path,
2774 ext4_lblk_t split,
2775 int split_flag,
2776 int flags)
2777{
2778 ext4_fsblk_t newblock;
2779 ext4_lblk_t ee_block;
2780 struct ext4_extent *ex, newex, orig_ex;
2781 struct ext4_extent *ex2 = NULL;
2782 unsigned int ee_len, depth;
2783 int err = 0;
2784
2785 ext_debug("ext4_split_extents_at: inode %lu, logical"
2786 "block %llu\n", inode->i_ino, (unsigned long long)split);
2787
2788 ext4_ext_show_leaf(inode, path);
2789
2790 depth = ext_depth(inode);
2791 ex = path[depth].p_ext;
2792 ee_block = le32_to_cpu(ex->ee_block);
2793 ee_len = ext4_ext_get_actual_len(ex);
2794 newblock = split - ee_block + ext4_ext_pblock(ex);
2795
2796 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
2797
2798 err = ext4_ext_get_access(handle, inode, path + depth);
2799 if (err)
2800 goto out;
2801
2802 if (split == ee_block) {
2803 /*
2804 * case b: block @split is the block that the extent begins with
2805 * then we just change the state of the extent, and splitting
2806 * is not needed.
2807 */
2808 if (split_flag & EXT4_EXT_MARK_UNINIT2)
2809 ext4_ext_mark_uninitialized(ex);
2810 else
2811 ext4_ext_mark_initialized(ex);
2812
2813 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2814 ext4_ext_try_to_merge(inode, path, ex);
2815
2816 err = ext4_ext_dirty(handle, inode, path + depth);
2817 goto out;
2818 }
2819
2820 /* case a */
2821 memcpy(&orig_ex, ex, sizeof(orig_ex));
2822 ex->ee_len = cpu_to_le16(split - ee_block);
2823 if (split_flag & EXT4_EXT_MARK_UNINIT1)
2824 ext4_ext_mark_uninitialized(ex);
2825
2826 /*
2827 * path may lead to new leaf, not to original leaf any more
2828 * after ext4_ext_insert_extent() returns,
2829 */
2830 err = ext4_ext_dirty(handle, inode, path + depth);
2831 if (err)
2832 goto fix_extent_len;
2833
2834 ex2 = &newex;
2835 ex2->ee_block = cpu_to_le32(split);
2836 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
2837 ext4_ext_store_pblock(ex2, newblock);
2838 if (split_flag & EXT4_EXT_MARK_UNINIT2)
2839 ext4_ext_mark_uninitialized(ex2);
2840
2841 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2842 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2843 err = ext4_ext_zeroout(inode, &orig_ex);
2844 if (err)
2845 goto fix_extent_len;
2846 /* update the extent length and mark as initialized */
2847 ex->ee_len = cpu_to_le32(ee_len);
2848 ext4_ext_try_to_merge(inode, path, ex);
2849 err = ext4_ext_dirty(handle, inode, path + depth);
2850 goto out;
2851 } else if (err)
2852 goto fix_extent_len;
2853
2854out:
2855 ext4_ext_show_leaf(inode, path);
2856 return err;
2857
2858fix_extent_len:
2859 ex->ee_len = orig_ex.ee_len;
2860 ext4_ext_dirty(handle, inode, path + depth);
2861 return err;
2862}
2863
2864/*
2865 * ext4_split_extents() splits an extent and mark extent which is covered
2866 * by @map as split_flags indicates
2867 *
2868 * It may result in splitting the extent into multiple extents (upto three)
2869 * There are three possibilities:
2870 * a> There is no split required
2871 * b> Splits in two extents: Split is happening at either end of the extent
2872 * c> Splits in three extents: Somone is splitting in middle of the extent
2873 *
2874 */
2875static int ext4_split_extent(handle_t *handle,
2876 struct inode *inode,
2877 struct ext4_ext_path *path,
2878 struct ext4_map_blocks *map,
2879 int split_flag,
2880 int flags)
2881{
2882 ext4_lblk_t ee_block;
2883 struct ext4_extent *ex;
2884 unsigned int ee_len, depth;
2885 int err = 0;
2886 int uninitialized;
2887 int split_flag1, flags1;
2888
2889 depth = ext_depth(inode);
2890 ex = path[depth].p_ext;
2891 ee_block = le32_to_cpu(ex->ee_block);
2892 ee_len = ext4_ext_get_actual_len(ex);
2893 uninitialized = ext4_ext_is_uninitialized(ex);
2894
2895 if (map->m_lblk + map->m_len < ee_block + ee_len) {
2896 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
2897 EXT4_EXT_MAY_ZEROOUT : 0;
2898 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
2899 if (uninitialized)
2900 split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
2901 EXT4_EXT_MARK_UNINIT2;
2902 err = ext4_split_extent_at(handle, inode, path,
2903 map->m_lblk + map->m_len, split_flag1, flags1);
2904 if (err)
2905 goto out;
2906 }
2907
2908 ext4_ext_drop_refs(path);
2909 path = ext4_ext_find_extent(inode, map->m_lblk, path);
2910 if (IS_ERR(path))
2911 return PTR_ERR(path);
2912
2913 if (map->m_lblk >= ee_block) {
2914 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
2915 EXT4_EXT_MAY_ZEROOUT : 0;
2916 if (uninitialized)
2917 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
2918 if (split_flag & EXT4_EXT_MARK_UNINIT2)
2919 split_flag1 |= EXT4_EXT_MARK_UNINIT2;
2920 err = ext4_split_extent_at(handle, inode, path,
2921 map->m_lblk, split_flag1, flags);
2922 if (err)
2923 goto out;
2924 }
2925
2926 ext4_ext_show_leaf(inode, path);
2927out:
2928 return err ? err : map->m_len;
2929}
2930
2532#define EXT4_EXT_ZERO_LEN 7 2931#define EXT4_EXT_ZERO_LEN 7
2533/* 2932/*
2534 * This function is called by ext4_ext_map_blocks() if someone tries to write 2933 * This function is called by ext4_ext_map_blocks() if someone tries to write
@@ -2545,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2545 struct ext4_map_blocks *map, 2944 struct ext4_map_blocks *map,
2546 struct ext4_ext_path *path) 2945 struct ext4_ext_path *path)
2547{ 2946{
2548 struct ext4_extent *ex, newex, orig_ex; 2947 struct ext4_map_blocks split_map;
2549 struct ext4_extent *ex1 = NULL; 2948 struct ext4_extent zero_ex;
2550 struct ext4_extent *ex2 = NULL; 2949 struct ext4_extent *ex;
2551 struct ext4_extent *ex3 = NULL;
2552 struct ext4_extent_header *eh;
2553 ext4_lblk_t ee_block, eof_block; 2950 ext4_lblk_t ee_block, eof_block;
2554 unsigned int allocated, ee_len, depth; 2951 unsigned int allocated, ee_len, depth;
2555 ext4_fsblk_t newblock;
2556 int err = 0; 2952 int err = 0;
2557 int ret = 0; 2953 int split_flag = 0;
2558 int may_zeroout;
2559 2954
2560 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 2955 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
2561 "block %llu, max_blocks %u\n", inode->i_ino, 2956 "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2567,280 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2567 eof_block = map->m_lblk + map->m_len; 2962 eof_block = map->m_lblk + map->m_len;
2568 2963
2569 depth = ext_depth(inode); 2964 depth = ext_depth(inode);
2570 eh = path[depth].p_hdr;
2571 ex = path[depth].p_ext; 2965 ex = path[depth].p_ext;
2572 ee_block = le32_to_cpu(ex->ee_block); 2966 ee_block = le32_to_cpu(ex->ee_block);
2573 ee_len = ext4_ext_get_actual_len(ex); 2967 ee_len = ext4_ext_get_actual_len(ex);
2574 allocated = ee_len - (map->m_lblk - ee_block); 2968 allocated = ee_len - (map->m_lblk - ee_block);
2575 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2576
2577 ex2 = ex;
2578 orig_ex.ee_block = ex->ee_block;
2579 orig_ex.ee_len = cpu_to_le16(ee_len);
2580 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2581 2969
2970 WARN_ON(map->m_lblk < ee_block);
2582 /* 2971 /*
2583 * It is safe to convert extent to initialized via explicit 2972 * It is safe to convert extent to initialized via explicit
2584 * zeroout only if extent is fully insde i_size or new_size. 2973 * zeroout only if extent is fully insde i_size or new_size.
2585 */ 2974 */
2586 may_zeroout = ee_block + ee_len <= eof_block; 2975 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
2587 2976
2588 err = ext4_ext_get_access(handle, inode, path + depth);
2589 if (err)
2590 goto out;
2591 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 2977 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2592 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { 2978 if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
2593 err = ext4_ext_zeroout(inode, &orig_ex); 2979 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2980 err = ext4_ext_zeroout(inode, ex);
2594 if (err) 2981 if (err)
2595 goto fix_extent_len;
2596 /* update the extent length and mark as initialized */
2597 ex->ee_block = orig_ex.ee_block;
2598 ex->ee_len = orig_ex.ee_len;
2599 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2600 ext4_ext_dirty(handle, inode, path + depth);
2601 /* zeroed the full extent */
2602 return allocated;
2603 }
2604
2605 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
2606 if (map->m_lblk > ee_block) {
2607 ex1 = ex;
2608 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
2609 ext4_ext_mark_uninitialized(ex1);
2610 ex2 = &newex;
2611 }
2612 /*
2613 * for sanity, update the length of the ex2 extent before
2614 * we insert ex3, if ex1 is NULL. This is to avoid temporary
2615 * overlap of blocks.
2616 */
2617 if (!ex1 && allocated > map->m_len)
2618 ex2->ee_len = cpu_to_le16(map->m_len);
2619 /* ex3: to ee_block + ee_len : uninitialised */
2620 if (allocated > map->m_len) {
2621 unsigned int newdepth;
2622 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2623 if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
2624 /*
2625 * map->m_lblk == ee_block is handled by the zerouout
2626 * at the beginning.
2627 * Mark first half uninitialized.
2628 * Mark second half initialized and zero out the
2629 * initialized extent
2630 */
2631 ex->ee_block = orig_ex.ee_block;
2632 ex->ee_len = cpu_to_le16(ee_len - allocated);
2633 ext4_ext_mark_uninitialized(ex);
2634 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2635 ext4_ext_dirty(handle, inode, path + depth);
2636
2637 ex3 = &newex;
2638 ex3->ee_block = cpu_to_le32(map->m_lblk);
2639 ext4_ext_store_pblock(ex3, newblock);
2640 ex3->ee_len = cpu_to_le16(allocated);
2641 err = ext4_ext_insert_extent(handle, inode, path,
2642 ex3, 0);
2643 if (err == -ENOSPC) {
2644 err = ext4_ext_zeroout(inode, &orig_ex);
2645 if (err)
2646 goto fix_extent_len;
2647 ex->ee_block = orig_ex.ee_block;
2648 ex->ee_len = orig_ex.ee_len;
2649 ext4_ext_store_pblock(ex,
2650 ext4_ext_pblock(&orig_ex));
2651 ext4_ext_dirty(handle, inode, path + depth);
2652 /* blocks available from map->m_lblk */
2653 return allocated;
2654
2655 } else if (err)
2656 goto fix_extent_len;
2657
2658 /*
2659 * We need to zero out the second half because
2660 * an fallocate request can update file size and
2661 * converting the second half to initialized extent
2662 * implies that we can leak some junk data to user
2663 * space.
2664 */
2665 err = ext4_ext_zeroout(inode, ex3);
2666 if (err) {
2667 /*
2668 * We should actually mark the
2669 * second half as uninit and return error
2670 * Insert would have changed the extent
2671 */
2672 depth = ext_depth(inode);
2673 ext4_ext_drop_refs(path);
2674 path = ext4_ext_find_extent(inode, map->m_lblk,
2675 path);
2676 if (IS_ERR(path)) {
2677 err = PTR_ERR(path);
2678 return err;
2679 }
2680 /* get the second half extent details */
2681 ex = path[depth].p_ext;
2682 err = ext4_ext_get_access(handle, inode,
2683 path + depth);
2684 if (err)
2685 return err;
2686 ext4_ext_mark_uninitialized(ex);
2687 ext4_ext_dirty(handle, inode, path + depth);
2688 return err;
2689 }
2690
2691 /* zeroed the second half */
2692 return allocated;
2693 }
2694 ex3 = &newex;
2695 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
2696 ext4_ext_store_pblock(ex3, newblock + map->m_len);
2697 ex3->ee_len = cpu_to_le16(allocated - map->m_len);
2698 ext4_ext_mark_uninitialized(ex3);
2699 err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
2700 if (err == -ENOSPC && may_zeroout) {
2701 err = ext4_ext_zeroout(inode, &orig_ex);
2702 if (err)
2703 goto fix_extent_len;
2704 /* update the extent length and mark as initialized */
2705 ex->ee_block = orig_ex.ee_block;
2706 ex->ee_len = orig_ex.ee_len;
2707 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2708 ext4_ext_dirty(handle, inode, path + depth);
2709 /* zeroed the full extent */
2710 /* blocks available from map->m_lblk */
2711 return allocated;
2712
2713 } else if (err)
2714 goto fix_extent_len;
2715 /*
2716 * The depth, and hence eh & ex might change
2717 * as part of the insert above.
2718 */
2719 newdepth = ext_depth(inode);
2720 /*
2721 * update the extent length after successful insert of the
2722 * split extent
2723 */
2724 ee_len -= ext4_ext_get_actual_len(ex3);
2725 orig_ex.ee_len = cpu_to_le16(ee_len);
2726 may_zeroout = ee_block + ee_len <= eof_block;
2727
2728 depth = newdepth;
2729 ext4_ext_drop_refs(path);
2730 path = ext4_ext_find_extent(inode, map->m_lblk, path);
2731 if (IS_ERR(path)) {
2732 err = PTR_ERR(path);
2733 goto out; 2982 goto out;
2734 }
2735 eh = path[depth].p_hdr;
2736 ex = path[depth].p_ext;
2737 if (ex2 != &newex)
2738 ex2 = ex;
2739 2983
2740 err = ext4_ext_get_access(handle, inode, path + depth); 2984 err = ext4_ext_get_access(handle, inode, path + depth);
2741 if (err) 2985 if (err)
2742 goto out; 2986 goto out;
2743 2987 ext4_ext_mark_initialized(ex);
2744 allocated = map->m_len; 2988 ext4_ext_try_to_merge(inode, path, ex);
2745 2989 err = ext4_ext_dirty(handle, inode, path + depth);
2746 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2990 goto out;
2747 * to insert a extent in the middle zerout directly
2748 * otherwise give the extent a chance to merge to left
2749 */
2750 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2751 map->m_lblk != ee_block && may_zeroout) {
2752 err = ext4_ext_zeroout(inode, &orig_ex);
2753 if (err)
2754 goto fix_extent_len;
2755 /* update the extent length and mark as initialized */
2756 ex->ee_block = orig_ex.ee_block;
2757 ex->ee_len = orig_ex.ee_len;
2758 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2759 ext4_ext_dirty(handle, inode, path + depth);
2760 /* zero out the first half */
2761 /* blocks available from map->m_lblk */
2762 return allocated;
2763 }
2764 }
2765 /*
2766 * If there was a change of depth as part of the
2767 * insertion of ex3 above, we need to update the length
2768 * of the ex1 extent again here
2769 */
2770 if (ex1 && ex1 != ex) {
2771 ex1 = ex;
2772 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
2773 ext4_ext_mark_uninitialized(ex1);
2774 ex2 = &newex;
2775 }
2776 /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
2777 ex2->ee_block = cpu_to_le32(map->m_lblk);
2778 ext4_ext_store_pblock(ex2, newblock);
2779 ex2->ee_len = cpu_to_le16(allocated);
2780 if (ex2 != ex)
2781 goto insert;
2782 /*
2783 * New (initialized) extent starts from the first block
2784 * in the current extent. i.e., ex2 == ex
2785 * We have to see if it can be merged with the extent
2786 * on the left.
2787 */
2788 if (ex2 > EXT_FIRST_EXTENT(eh)) {
2789 /*
2790 * To merge left, pass "ex2 - 1" to try_to_merge(),
2791 * since it merges towards right _only_.
2792 */
2793 ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
2794 if (ret) {
2795 err = ext4_ext_correct_indexes(handle, inode, path);
2796 if (err)
2797 goto out;
2798 depth = ext_depth(inode);
2799 ex2--;
2800 }
2801 } 2991 }
2992
2802 /* 2993 /*
2803 * Try to Merge towards right. This might be required 2994 * four cases:
2804 * only when the whole extent is being written to. 2995 * 1. split the extent into three extents.
2805 * i.e. ex2 == ex and ex3 == NULL. 2996 * 2. split the extent into two extents, zeroout the first half.
2997 * 3. split the extent into two extents, zeroout the second half.
2998 * 4. split the extent into two extents with out zeroout.
2806 */ 2999 */
2807 if (!ex3) { 3000 split_map.m_lblk = map->m_lblk;
2808 ret = ext4_ext_try_to_merge(inode, path, ex2); 3001 split_map.m_len = map->m_len;
2809 if (ret) { 3002
2810 err = ext4_ext_correct_indexes(handle, inode, path); 3003 if (allocated > map->m_len) {
3004 if (allocated <= EXT4_EXT_ZERO_LEN &&
3005 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3006 /* case 3 */
3007 zero_ex.ee_block =
3008 cpu_to_le32(map->m_lblk);
3009 zero_ex.ee_len = cpu_to_le16(allocated);
3010 ext4_ext_store_pblock(&zero_ex,
3011 ext4_ext_pblock(ex) + map->m_lblk - ee_block);
3012 err = ext4_ext_zeroout(inode, &zero_ex);
2811 if (err) 3013 if (err)
2812 goto out; 3014 goto out;
3015 split_map.m_lblk = map->m_lblk;
3016 split_map.m_len = allocated;
3017 } else if ((map->m_lblk - ee_block + map->m_len <
3018 EXT4_EXT_ZERO_LEN) &&
3019 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3020 /* case 2 */
3021 if (map->m_lblk != ee_block) {
3022 zero_ex.ee_block = ex->ee_block;
3023 zero_ex.ee_len = cpu_to_le16(map->m_lblk -
3024 ee_block);
3025 ext4_ext_store_pblock(&zero_ex,
3026 ext4_ext_pblock(ex));
3027 err = ext4_ext_zeroout(inode, &zero_ex);
3028 if (err)
3029 goto out;
3030 }
3031
3032 split_map.m_lblk = ee_block;
3033 split_map.m_len = map->m_lblk - ee_block + map->m_len;
3034 allocated = map->m_len;
2813 } 3035 }
2814 } 3036 }
2815 /* Mark modified extent as dirty */ 3037
2816 err = ext4_ext_dirty(handle, inode, path + depth); 3038 allocated = ext4_split_extent(handle, inode, path,
2817 goto out; 3039 &split_map, split_flag, 0);
2818insert: 3040 if (allocated < 0)
2819 err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); 3041 err = allocated;
2820 if (err == -ENOSPC && may_zeroout) { 3042
2821 err = ext4_ext_zeroout(inode, &orig_ex);
2822 if (err)
2823 goto fix_extent_len;
2824 /* update the extent length and mark as initialized */
2825 ex->ee_block = orig_ex.ee_block;
2826 ex->ee_len = orig_ex.ee_len;
2827 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2828 ext4_ext_dirty(handle, inode, path + depth);
2829 /* zero out the first half */
2830 return allocated;
2831 } else if (err)
2832 goto fix_extent_len;
2833out: 3043out:
2834 ext4_ext_show_leaf(inode, path);
2835 return err ? err : allocated; 3044 return err ? err : allocated;
2836
2837fix_extent_len:
2838 ex->ee_block = orig_ex.ee_block;
2839 ex->ee_len = orig_ex.ee_len;
2840 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2841 ext4_ext_mark_uninitialized(ex);
2842 ext4_ext_dirty(handle, inode, path + depth);
2843 return err;
2844} 3045}
2845 3046
2846/* 3047/*
@@ -2871,15 +3072,11 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2871 struct ext4_ext_path *path, 3072 struct ext4_ext_path *path,
2872 int flags) 3073 int flags)
2873{ 3074{
2874 struct ext4_extent *ex, newex, orig_ex; 3075 ext4_lblk_t eof_block;
2875 struct ext4_extent *ex1 = NULL; 3076 ext4_lblk_t ee_block;
2876 struct ext4_extent *ex2 = NULL; 3077 struct ext4_extent *ex;
2877 struct ext4_extent *ex3 = NULL; 3078 unsigned int ee_len;
2878 ext4_lblk_t ee_block, eof_block; 3079 int split_flag = 0, depth;
2879 unsigned int allocated, ee_len, depth;
2880 ext4_fsblk_t newblock;
2881 int err = 0;
2882 int may_zeroout;
2883 3080
2884 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 3081 ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
2885 "block %llu, max_blocks %u\n", inode->i_ino, 3082 "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2889,156 +3086,22 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2889 inode->i_sb->s_blocksize_bits; 3086 inode->i_sb->s_blocksize_bits;
2890 if (eof_block < map->m_lblk + map->m_len) 3087 if (eof_block < map->m_lblk + map->m_len)
2891 eof_block = map->m_lblk + map->m_len; 3088 eof_block = map->m_lblk + map->m_len;
2892
2893 depth = ext_depth(inode);
2894 ex = path[depth].p_ext;
2895 ee_block = le32_to_cpu(ex->ee_block);
2896 ee_len = ext4_ext_get_actual_len(ex);
2897 allocated = ee_len - (map->m_lblk - ee_block);
2898 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
2899
2900 ex2 = ex;
2901 orig_ex.ee_block = ex->ee_block;
2902 orig_ex.ee_len = cpu_to_le16(ee_len);
2903 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
2904
2905 /* 3089 /*
2906 * It is safe to convert extent to initialized via explicit 3090 * It is safe to convert extent to initialized via explicit
2907 * zeroout only if extent is fully insde i_size or new_size. 3091 * zeroout only if extent is fully insde i_size or new_size.
2908 */ 3092 */
2909 may_zeroout = ee_block + ee_len <= eof_block; 3093 depth = ext_depth(inode);
2910 3094 ex = path[depth].p_ext;
2911 /* 3095 ee_block = le32_to_cpu(ex->ee_block);
2912 * If the uninitialized extent begins at the same logical 3096 ee_len = ext4_ext_get_actual_len(ex);
2913 * block where the write begins, and the write completely
2914 * covers the extent, then we don't need to split it.
2915 */
2916 if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
2917 return allocated;
2918
2919 err = ext4_ext_get_access(handle, inode, path + depth);
2920 if (err)
2921 goto out;
2922 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
2923 if (map->m_lblk > ee_block) {
2924 ex1 = ex;
2925 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
2926 ext4_ext_mark_uninitialized(ex1);
2927 ex2 = &newex;
2928 }
2929 /*
2930 * for sanity, update the length of the ex2 extent before
2931 * we insert ex3, if ex1 is NULL. This is to avoid temporary
2932 * overlap of blocks.
2933 */
2934 if (!ex1 && allocated > map->m_len)
2935 ex2->ee_len = cpu_to_le16(map->m_len);
2936 /* ex3: to ee_block + ee_len : uninitialised */
2937 if (allocated > map->m_len) {
2938 unsigned int newdepth;
2939 ex3 = &newex;
2940 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
2941 ext4_ext_store_pblock(ex3, newblock + map->m_len);
2942 ex3->ee_len = cpu_to_le16(allocated - map->m_len);
2943 ext4_ext_mark_uninitialized(ex3);
2944 err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
2945 if (err == -ENOSPC && may_zeroout) {
2946 err = ext4_ext_zeroout(inode, &orig_ex);
2947 if (err)
2948 goto fix_extent_len;
2949 /* update the extent length and mark as initialized */
2950 ex->ee_block = orig_ex.ee_block;
2951 ex->ee_len = orig_ex.ee_len;
2952 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
2953 ext4_ext_dirty(handle, inode, path + depth);
2954 /* zeroed the full extent */
2955 /* blocks available from map->m_lblk */
2956 return allocated;
2957
2958 } else if (err)
2959 goto fix_extent_len;
2960 /*
2961 * The depth, and hence eh & ex might change
2962 * as part of the insert above.
2963 */
2964 newdepth = ext_depth(inode);
2965 /*
2966 * update the extent length after successful insert of the
2967 * split extent
2968 */
2969 ee_len -= ext4_ext_get_actual_len(ex3);
2970 orig_ex.ee_len = cpu_to_le16(ee_len);
2971 may_zeroout = ee_block + ee_len <= eof_block;
2972
2973 depth = newdepth;
2974 ext4_ext_drop_refs(path);
2975 path = ext4_ext_find_extent(inode, map->m_lblk, path);
2976 if (IS_ERR(path)) {
2977 err = PTR_ERR(path);
2978 goto out;
2979 }
2980 ex = path[depth].p_ext;
2981 if (ex2 != &newex)
2982 ex2 = ex;
2983 3097
2984 err = ext4_ext_get_access(handle, inode, path + depth); 3098 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
2985 if (err) 3099 split_flag |= EXT4_EXT_MARK_UNINIT2;
2986 goto out;
2987 3100
2988 allocated = map->m_len; 3101 flags |= EXT4_GET_BLOCKS_PRE_IO;
2989 } 3102 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
2990 /*
2991 * If there was a change of depth as part of the
2992 * insertion of ex3 above, we need to update the length
2993 * of the ex1 extent again here
2994 */
2995 if (ex1 && ex1 != ex) {
2996 ex1 = ex;
2997 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
2998 ext4_ext_mark_uninitialized(ex1);
2999 ex2 = &newex;
3000 }
3001 /*
3002 * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
3003 * using direct I/O, uninitialised still.
3004 */
3005 ex2->ee_block = cpu_to_le32(map->m_lblk);
3006 ext4_ext_store_pblock(ex2, newblock);
3007 ex2->ee_len = cpu_to_le16(allocated);
3008 ext4_ext_mark_uninitialized(ex2);
3009 if (ex2 != ex)
3010 goto insert;
3011 /* Mark modified extent as dirty */
3012 err = ext4_ext_dirty(handle, inode, path + depth);
3013 ext_debug("out here\n");
3014 goto out;
3015insert:
3016 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3017 if (err == -ENOSPC && may_zeroout) {
3018 err = ext4_ext_zeroout(inode, &orig_ex);
3019 if (err)
3020 goto fix_extent_len;
3021 /* update the extent length and mark as initialized */
3022 ex->ee_block = orig_ex.ee_block;
3023 ex->ee_len = orig_ex.ee_len;
3024 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3025 ext4_ext_dirty(handle, inode, path + depth);
3026 /* zero out the first half */
3027 return allocated;
3028 } else if (err)
3029 goto fix_extent_len;
3030out:
3031 ext4_ext_show_leaf(inode, path);
3032 return err ? err : allocated;
3033
3034fix_extent_len:
3035 ex->ee_block = orig_ex.ee_block;
3036 ex->ee_len = orig_ex.ee_len;
3037 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
3038 ext4_ext_mark_uninitialized(ex);
3039 ext4_ext_dirty(handle, inode, path + depth);
3040 return err;
3041} 3103}
3104
3042static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3105static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3043 struct inode *inode, 3106 struct inode *inode,
3044 struct ext4_ext_path *path) 3107 struct ext4_ext_path *path)
@@ -3047,46 +3110,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3047 struct ext4_extent_header *eh; 3110 struct ext4_extent_header *eh;
3048 int depth; 3111 int depth;
3049 int err = 0; 3112 int err = 0;
3050 int ret = 0;
3051 3113
3052 depth = ext_depth(inode); 3114 depth = ext_depth(inode);
3053 eh = path[depth].p_hdr; 3115 eh = path[depth].p_hdr;
3054 ex = path[depth].p_ext; 3116 ex = path[depth].p_ext;
3055 3117
3118 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
3119 "block %llu, max_blocks %u\n", inode->i_ino,
3120 (unsigned long long)le32_to_cpu(ex->ee_block),
3121 ext4_ext_get_actual_len(ex));
3122
3056 err = ext4_ext_get_access(handle, inode, path + depth); 3123 err = ext4_ext_get_access(handle, inode, path + depth);
3057 if (err) 3124 if (err)
3058 goto out; 3125 goto out;
3059 /* first mark the extent as initialized */ 3126 /* first mark the extent as initialized */
3060 ext4_ext_mark_initialized(ex); 3127 ext4_ext_mark_initialized(ex);
3061 3128
3062 /* 3129 /* note: ext4_ext_correct_indexes() isn't needed here because
3063 * We have to see if it can be merged with the extent 3130 * borders are not changed
3064 * on the left.
3065 */
3066 if (ex > EXT_FIRST_EXTENT(eh)) {
3067 /*
3068 * To merge left, pass "ex - 1" to try_to_merge(),
3069 * since it merges towards right _only_.
3070 */
3071 ret = ext4_ext_try_to_merge(inode, path, ex - 1);
3072 if (ret) {
3073 err = ext4_ext_correct_indexes(handle, inode, path);
3074 if (err)
3075 goto out;
3076 depth = ext_depth(inode);
3077 ex--;
3078 }
3079 }
3080 /*
3081 * Try to Merge towards right.
3082 */ 3131 */
3083 ret = ext4_ext_try_to_merge(inode, path, ex); 3132 ext4_ext_try_to_merge(inode, path, ex);
3084 if (ret) { 3133
3085 err = ext4_ext_correct_indexes(handle, inode, path);
3086 if (err)
3087 goto out;
3088 depth = ext_depth(inode);
3089 }
3090 /* Mark modified extent as dirty */ 3134 /* Mark modified extent as dirty */
3091 err = ext4_ext_dirty(handle, inode, path + depth); 3135 err = ext4_ext_dirty(handle, inode, path + depth);
3092out: 3136out:
@@ -3302,15 +3346,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3302 ext4_fsblk_t newblock = 0; 3346 ext4_fsblk_t newblock = 0;
3303 int err = 0, depth, ret; 3347 int err = 0, depth, ret;
3304 unsigned int allocated = 0; 3348 unsigned int allocated = 0;
3349 unsigned int punched_out = 0;
3350 unsigned int result = 0;
3305 struct ext4_allocation_request ar; 3351 struct ext4_allocation_request ar;
3306 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3352 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3353 struct ext4_map_blocks punch_map;
3307 3354
3308 ext_debug("blocks %u/%u requested for inode %lu\n", 3355 ext_debug("blocks %u/%u requested for inode %lu\n",
3309 map->m_lblk, map->m_len, inode->i_ino); 3356 map->m_lblk, map->m_len, inode->i_ino);
3310 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3357 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3311 3358
3312 /* check in cache */ 3359 /* check in cache */
3313 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3360 if (ext4_ext_in_cache(inode, map->m_lblk, &newex) &&
3361 ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) {
3314 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3362 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3315 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3363 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3316 /* 3364 /*
@@ -3375,16 +3423,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3375 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3423 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
3376 ee_block, ee_len, newblock); 3424 ee_block, ee_len, newblock);
3377 3425
3378 /* Do not put uninitialized extent in the cache */ 3426 if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
3379 if (!ext4_ext_is_uninitialized(ex)) { 3427 /*
3380 ext4_ext_put_in_cache(inode, ee_block, 3428 * Do not put uninitialized extent
3381 ee_len, ee_start); 3429 * in the cache
3382 goto out; 3430 */
3431 if (!ext4_ext_is_uninitialized(ex)) {
3432 ext4_ext_put_in_cache(inode, ee_block,
3433 ee_len, ee_start);
3434 goto out;
3435 }
3436 ret = ext4_ext_handle_uninitialized_extents(
3437 handle, inode, map, path, flags,
3438 allocated, newblock);
3439 return ret;
3383 } 3440 }
3384 ret = ext4_ext_handle_uninitialized_extents(handle, 3441
3385 inode, map, path, flags, allocated, 3442 /*
3386 newblock); 3443 * Punch out the map length, but only to the
3387 return ret; 3444 * end of the extent
3445 */
3446 punched_out = allocated < map->m_len ?
3447 allocated : map->m_len;
3448
3449 /*
3450 * Sense extents need to be converted to
3451 * uninitialized, they must fit in an
3452 * uninitialized extent
3453 */
3454 if (punched_out > EXT_UNINIT_MAX_LEN)
3455 punched_out = EXT_UNINIT_MAX_LEN;
3456
3457 punch_map.m_lblk = map->m_lblk;
3458 punch_map.m_pblk = newblock;
3459 punch_map.m_len = punched_out;
3460 punch_map.m_flags = 0;
3461
3462 /* Check to see if the extent needs to be split */
3463 if (punch_map.m_len != ee_len ||
3464 punch_map.m_lblk != ee_block) {
3465
3466 ret = ext4_split_extent(handle, inode,
3467 path, &punch_map, 0,
3468 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
3469 EXT4_GET_BLOCKS_PRE_IO);
3470
3471 if (ret < 0) {
3472 err = ret;
3473 goto out2;
3474 }
3475 /*
3476 * find extent for the block at
3477 * the start of the hole
3478 */
3479 ext4_ext_drop_refs(path);
3480 kfree(path);
3481
3482 path = ext4_ext_find_extent(inode,
3483 map->m_lblk, NULL);
3484 if (IS_ERR(path)) {
3485 err = PTR_ERR(path);
3486 path = NULL;
3487 goto out2;
3488 }
3489
3490 depth = ext_depth(inode);
3491 ex = path[depth].p_ext;
3492 ee_len = ext4_ext_get_actual_len(ex);
3493 ee_block = le32_to_cpu(ex->ee_block);
3494 ee_start = ext4_ext_pblock(ex);
3495
3496 }
3497
3498 ext4_ext_mark_uninitialized(ex);
3499
3500 err = ext4_ext_remove_space(inode, map->m_lblk,
3501 map->m_lblk + punched_out);
3502
3503 goto out2;
3388 } 3504 }
3389 } 3505 }
3390 3506
@@ -3446,6 +3562,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3446 else 3562 else
3447 /* disable in-core preallocation for non-regular files */ 3563 /* disable in-core preallocation for non-regular files */
3448 ar.flags = 0; 3564 ar.flags = 0;
3565 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
3566 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
3449 newblock = ext4_mb_new_blocks(handle, &ar, &err); 3567 newblock = ext4_mb_new_blocks(handle, &ar, &err);
3450 if (!newblock) 3568 if (!newblock)
3451 goto out2; 3569 goto out2;
@@ -3529,7 +3647,11 @@ out2:
3529 } 3647 }
3530 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 3648 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3531 newblock, map->m_len, err ? err : allocated); 3649 newblock, map->m_len, err ? err : allocated);
3532 return err ? err : allocated; 3650
3651 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
3652 punched_out : allocated;
3653
3654 return err ? err : result;
3533} 3655}
3534 3656
3535void ext4_ext_truncate(struct inode *inode) 3657void ext4_ext_truncate(struct inode *inode)
@@ -3577,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode)
3577 3699
3578 last_block = (inode->i_size + sb->s_blocksize - 1) 3700 last_block = (inode->i_size + sb->s_blocksize - 1)
3579 >> EXT4_BLOCK_SIZE_BITS(sb); 3701 >> EXT4_BLOCK_SIZE_BITS(sb);
3580 err = ext4_ext_remove_space(inode, last_block); 3702 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK);
3581 3703
3582 /* In a multi-transaction truncate, we only make the final 3704 /* In a multi-transaction truncate, we only make the final
3583 * transaction synchronous. 3705 * transaction synchronous.
@@ -3585,8 +3707,9 @@ void ext4_ext_truncate(struct inode *inode)
3585 if (IS_SYNC(inode)) 3707 if (IS_SYNC(inode))
3586 ext4_handle_sync(handle); 3708 ext4_handle_sync(handle);
3587 3709
3588out_stop:
3589 up_write(&EXT4_I(inode)->i_data_sem); 3710 up_write(&EXT4_I(inode)->i_data_sem);
3711
3712out_stop:
3590 /* 3713 /*
3591 * If this was a simple ftruncate() and the file will remain alive, 3714 * If this was a simple ftruncate() and the file will remain alive,
3592 * then we need to clear up the orphan record which we created above. 3715 * then we need to clear up the orphan record which we created above.
@@ -3651,10 +3774,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3651 struct ext4_map_blocks map; 3774 struct ext4_map_blocks map;
3652 unsigned int credits, blkbits = inode->i_blkbits; 3775 unsigned int credits, blkbits = inode->i_blkbits;
3653 3776
3654 /* We only support the FALLOC_FL_KEEP_SIZE mode */
3655 if (mode & ~FALLOC_FL_KEEP_SIZE)
3656 return -EOPNOTSUPP;
3657
3658 /* 3777 /*
3659 * currently supporting (pre)allocate mode for extent-based 3778 * currently supporting (pre)allocate mode for extent-based
3660 * files _only_ 3779 * files _only_
@@ -3662,6 +3781,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3662 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3781 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3663 return -EOPNOTSUPP; 3782 return -EOPNOTSUPP;
3664 3783
3784 /* Return error if mode is not supported */
3785 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
3786 return -EOPNOTSUPP;
3787
3788 if (mode & FALLOC_FL_PUNCH_HOLE)
3789 return ext4_punch_hole(file, offset, len);
3790
3665 trace_ext4_fallocate_enter(inode, offset, len, mode); 3791 trace_ext4_fallocate_enter(inode, offset, len, mode);
3666 map.m_lblk = offset >> blkbits; 3792 map.m_lblk = offset >> blkbits;
3667 /* 3793 /*
@@ -3691,7 +3817,8 @@ retry:
3691 break; 3817 break;
3692 } 3818 }
3693 ret = ext4_map_blocks(handle, inode, &map, 3819 ret = ext4_map_blocks(handle, inode, &map,
3694 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); 3820 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
3821 EXT4_GET_BLOCKS_NO_NORMALIZE);
3695 if (ret <= 0) { 3822 if (ret <= 0) {
3696#ifdef EXT4FS_DEBUG 3823#ifdef EXT4FS_DEBUG
3697 WARN_ON(ret <= 0); 3824 WARN_ON(ret <= 0);
@@ -3822,6 +3949,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3822 pgoff_t last_offset; 3949 pgoff_t last_offset;
3823 pgoff_t offset; 3950 pgoff_t offset;
3824 pgoff_t index; 3951 pgoff_t index;
3952 pgoff_t start_index = 0;
3825 struct page **pages = NULL; 3953 struct page **pages = NULL;
3826 struct buffer_head *bh = NULL; 3954 struct buffer_head *bh = NULL;
3827 struct buffer_head *head = NULL; 3955 struct buffer_head *head = NULL;
@@ -3848,39 +3976,57 @@ out:
3848 kfree(pages); 3976 kfree(pages);
3849 return EXT_CONTINUE; 3977 return EXT_CONTINUE;
3850 } 3978 }
3979 index = 0;
3851 3980
3981next_page:
3852 /* Try to find the 1st mapped buffer. */ 3982 /* Try to find the 1st mapped buffer. */
3853 end = ((__u64)pages[0]->index << PAGE_SHIFT) >> 3983 end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
3854 blksize_bits; 3984 blksize_bits;
3855 if (!page_has_buffers(pages[0])) 3985 if (!page_has_buffers(pages[index]))
3856 goto out; 3986 goto out;
3857 head = page_buffers(pages[0]); 3987 head = page_buffers(pages[index]);
3858 if (!head) 3988 if (!head)
3859 goto out; 3989 goto out;
3860 3990
3991 index++;
3861 bh = head; 3992 bh = head;
3862 do { 3993 do {
3863 if (buffer_mapped(bh)) { 3994 if (end >= newex->ec_block +
3995 newex->ec_len)
3996 /* The buffer is out of
3997 * the request range.
3998 */
3999 goto out;
4000
4001 if (buffer_mapped(bh) &&
4002 end >= newex->ec_block) {
4003 start_index = index - 1;
3864 /* get the 1st mapped buffer. */ 4004 /* get the 1st mapped buffer. */
3865 if (end > newex->ec_block +
3866 newex->ec_len)
3867 /* The buffer is out of
3868 * the request range.
3869 */
3870 goto out;
3871 goto found_mapped_buffer; 4005 goto found_mapped_buffer;
3872 } 4006 }
4007
3873 bh = bh->b_this_page; 4008 bh = bh->b_this_page;
3874 end++; 4009 end++;
3875 } while (bh != head); 4010 } while (bh != head);
3876 4011
3877 /* No mapped buffer found. */ 4012 /* No mapped buffer in the range found in this page,
3878 goto out; 4013 * We need to look up next page.
4014 */
4015 if (index >= ret) {
4016 /* There is no page left, but we need to limit
4017 * newex->ec_len.
4018 */
4019 newex->ec_len = end - newex->ec_block;
4020 goto out;
4021 }
4022 goto next_page;
3879 } else { 4023 } else {
3880 /*Find contiguous delayed buffers. */ 4024 /*Find contiguous delayed buffers. */
3881 if (ret > 0 && pages[0]->index == last_offset) 4025 if (ret > 0 && pages[0]->index == last_offset)
3882 head = page_buffers(pages[0]); 4026 head = page_buffers(pages[0]);
3883 bh = head; 4027 bh = head;
4028 index = 1;
4029 start_index = 0;
3884 } 4030 }
3885 4031
3886found_mapped_buffer: 4032found_mapped_buffer:
@@ -3903,7 +4049,7 @@ found_mapped_buffer:
3903 end++; 4049 end++;
3904 } while (bh != head); 4050 } while (bh != head);
3905 4051
3906 for (index = 1; index < ret; index++) { 4052 for (; index < ret; index++) {
3907 if (!page_has_buffers(pages[index])) { 4053 if (!page_has_buffers(pages[index])) {
3908 bh = NULL; 4054 bh = NULL;
3909 break; 4055 break;
@@ -3913,8 +4059,10 @@ found_mapped_buffer:
3913 bh = NULL; 4059 bh = NULL;
3914 break; 4060 break;
3915 } 4061 }
4062
3916 if (pages[index]->index != 4063 if (pages[index]->index !=
3917 pages[0]->index + index) { 4064 pages[start_index]->index + index
4065 - start_index) {
3918 /* Blocks are not contiguous. */ 4066 /* Blocks are not contiguous. */
3919 bh = NULL; 4067 bh = NULL;
3920 break; 4068 break;
@@ -4006,6 +4154,177 @@ static int ext4_xattr_fiemap(struct inode *inode,
4006 return (error < 0 ? error : 0); 4154 return (error < 0 ? error : 0);
4007} 4155}
4008 4156
4157/*
4158 * ext4_ext_punch_hole
4159 *
4160 * Punches a hole of "length" bytes in a file starting
4161 * at byte "offset"
4162 *
4163 * @inode: The inode of the file to punch a hole in
4164 * @offset: The starting byte offset of the hole
4165 * @length: The length of the hole
4166 *
4167 * Returns the number of blocks removed or negative on err
4168 */
4169int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4170{
4171 struct inode *inode = file->f_path.dentry->d_inode;
4172 struct super_block *sb = inode->i_sb;
4173 struct ext4_ext_cache cache_ex;
4174 ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
4175 struct address_space *mapping = inode->i_mapping;
4176 struct ext4_map_blocks map;
4177 handle_t *handle;
4178 loff_t first_block_offset, last_block_offset, block_len;
4179 loff_t first_page, last_page, first_page_offset, last_page_offset;
4180 int ret, credits, blocks_released, err = 0;
4181
4182 first_block = (offset + sb->s_blocksize - 1) >>
4183 EXT4_BLOCK_SIZE_BITS(sb);
4184 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4185
4186 first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
4187 last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
4188
4189 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4190 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4191
4192 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4193 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4194
4195 /*
4196 * Write out all dirty pages to avoid race conditions
4197 * Then release them.
4198 */
4199 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4200 err = filemap_write_and_wait_range(mapping,
4201 first_page_offset == 0 ? 0 : first_page_offset-1,
4202 last_page_offset);
4203
4204 if (err)
4205 return err;
4206 }
4207
4208 /* Now release the pages */
4209 if (last_page_offset > first_page_offset) {
4210 truncate_inode_pages_range(mapping, first_page_offset,
4211 last_page_offset-1);
4212 }
4213
4214 /* finish any pending end_io work */
4215 ext4_flush_completed_IO(inode);
4216
4217 credits = ext4_writepage_trans_blocks(inode);
4218 handle = ext4_journal_start(inode, credits);
4219 if (IS_ERR(handle))
4220 return PTR_ERR(handle);
4221
4222 err = ext4_orphan_add(handle, inode);
4223 if (err)
4224 goto out;
4225
4226 /*
4227 * Now we need to zero out the un block aligned data.
4228 * If the file is smaller than a block, just
4229 * zero out the middle
4230 */
4231 if (first_block > last_block)
4232 ext4_block_zero_page_range(handle, mapping, offset, length);
4233 else {
4234 /* zero out the head of the hole before the first block */
4235 block_len = first_block_offset - offset;
4236 if (block_len > 0)
4237 ext4_block_zero_page_range(handle, mapping,
4238 offset, block_len);
4239
4240 /* zero out the tail of the hole after the last block */
4241 block_len = offset + length - last_block_offset;
4242 if (block_len > 0) {
4243 ext4_block_zero_page_range(handle, mapping,
4244 last_block_offset, block_len);
4245 }
4246 }
4247
4248 /* If there are no blocks to remove, return now */
4249 if (first_block >= last_block)
4250 goto out;
4251
4252 down_write(&EXT4_I(inode)->i_data_sem);
4253 ext4_ext_invalidate_cache(inode);
4254 ext4_discard_preallocations(inode);
4255
4256 /*
4257 * Loop over all the blocks and identify blocks
4258 * that need to be punched out
4259 */
4260 iblock = first_block;
4261 blocks_released = 0;
4262 while (iblock < last_block) {
4263 max_blocks = last_block - iblock;
4264 num_blocks = 1;
4265 memset(&map, 0, sizeof(map));
4266 map.m_lblk = iblock;
4267 map.m_len = max_blocks;
4268 ret = ext4_ext_map_blocks(handle, inode, &map,
4269 EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
4270
4271 if (ret > 0) {
4272 blocks_released += ret;
4273 num_blocks = ret;
4274 } else if (ret == 0) {
4275 /*
4276 * If map blocks could not find the block,
4277 * then it is in a hole. If the hole was
4278 * not already cached, then map blocks should
4279 * put it in the cache. So we can get the hole
4280 * out of the cache
4281 */
4282 memset(&cache_ex, 0, sizeof(cache_ex));
4283 if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
4284 !cache_ex.ec_start) {
4285
4286 /* The hole is cached */
4287 num_blocks = cache_ex.ec_block +
4288 cache_ex.ec_len - iblock;
4289
4290 } else {
4291 /* The block could not be identified */
4292 err = -EIO;
4293 break;
4294 }
4295 } else {
4296 /* Map blocks error */
4297 err = ret;
4298 break;
4299 }
4300
4301 if (num_blocks == 0) {
4302 /* This condition should never happen */
4303 ext_debug("Block lookup failed");
4304 err = -EIO;
4305 break;
4306 }
4307
4308 iblock += num_blocks;
4309 }
4310
4311 if (blocks_released > 0) {
4312 ext4_ext_invalidate_cache(inode);
4313 ext4_discard_preallocations(inode);
4314 }
4315
4316 if (IS_SYNC(inode))
4317 ext4_handle_sync(handle);
4318
4319 up_write(&EXT4_I(inode)->i_data_sem);
4320
4321out:
4322 ext4_orphan_del(handle, inode);
4323 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4324 ext4_mark_inode_dirty(handle, inode);
4325 ext4_journal_stop(handle);
4326 return err;
4327}
4009int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4328int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4010 __u64 start, __u64 len) 4329 __u64 start, __u64 len)
4011{ 4330{
@@ -4042,4 +4361,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4042 4361
4043 return error; 4362 return error;
4044} 4363}
4045
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 7b80d543b89e..2c0972322009 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -272,7 +272,6 @@ const struct file_operations ext4_file_operations = {
272}; 272};
273 273
274const struct inode_operations ext4_file_inode_operations = { 274const struct inode_operations ext4_file_inode_operations = {
275 .truncate = ext4_truncate,
276 .setattr = ext4_setattr, 275 .setattr = ext4_setattr,
277 .getattr = ext4_getattr, 276 .getattr = ext4_getattr,
278#ifdef CONFIG_EXT4_FS_XATTR 277#ifdef CONFIG_EXT4_FS_XATTR
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e9473cbe80df..ce66d2fe826c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -36,7 +36,7 @@
36 36
37static void dump_completed_IO(struct inode * inode) 37static void dump_completed_IO(struct inode * inode)
38{ 38{
39#ifdef EXT4_DEBUG 39#ifdef EXT4FS_DEBUG
40 struct list_head *cur, *before, *after; 40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1; 41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags; 42 unsigned long flags;
@@ -172,6 +172,7 @@ int ext4_sync_file(struct file *file, int datasync)
172 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 172 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
173 int ret; 173 int ret;
174 tid_t commit_tid; 174 tid_t commit_tid;
175 bool needs_barrier = false;
175 176
176 J_ASSERT(ext4_journal_current_handle() == NULL); 177 J_ASSERT(ext4_journal_current_handle() == NULL);
177 178
@@ -211,22 +212,12 @@ int ext4_sync_file(struct file *file, int datasync)
211 } 212 }
212 213
213 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 214 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
214 if (jbd2_log_start_commit(journal, commit_tid)) { 215 if (journal->j_flags & JBD2_BARRIER &&
215 /* 216 !jbd2_trans_will_send_data_barrier(journal, commit_tid))
216 * When the journal is on a different device than the 217 needs_barrier = true;
217 * fs data disk, we need to issue the barrier in 218 jbd2_log_start_commit(journal, commit_tid);
218 * writeback mode. (In ordered mode, the jbd2 layer 219 ret = jbd2_log_wait_commit(journal, commit_tid);
219 * will take care of issuing the barrier. In 220 if (needs_barrier)
220 * data=journal, all of the data blocks are written to
221 * the journal device.)
222 */
223 if (ext4_should_writeback_data(inode) &&
224 (journal->j_fs_dev != journal->j_dev) &&
225 (journal->j_flags & JBD2_BARRIER))
226 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
227 NULL);
228 ret = jbd2_log_wait_commit(journal, commit_tid);
229 } else if (journal->j_flags & JBD2_BARRIER)
230 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 221 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
231 out: 222 out:
232 trace_ext4_sync_file_exit(inode, ret); 223 trace_ext4_sync_file_exit(inode, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f2fa5e8a582c..50d0e9c64584 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
639 while (target > 0) { 639 while (target > 0) {
640 count = target; 640 count = target;
641 /* allocating blocks for indirect blocks and direct blocks */ 641 /* allocating blocks for indirect blocks and direct blocks */
642 current_block = ext4_new_meta_blocks(handle, inode, 642 current_block = ext4_new_meta_blocks(handle, inode, goal,
643 goal, &count, err); 643 0, &count, err);
644 if (*err) 644 if (*err)
645 goto failed_out; 645 goto failed_out;
646 646
@@ -1930,7 +1930,7 @@ repeat:
1930 * We do still charge estimated metadata to the sb though; 1930 * We do still charge estimated metadata to the sb though;
1931 * we cannot afford to run out of free blocks. 1931 * we cannot afford to run out of free blocks.
1932 */ 1932 */
1933 if (ext4_claim_free_blocks(sbi, md_needed + 1)) { 1933 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
1934 dquot_release_reservation_block(inode, 1); 1934 dquot_release_reservation_block(inode, 1);
1935 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1935 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1936 yield(); 1936 yield();
@@ -2796,9 +2796,7 @@ static int write_cache_pages_da(struct address_space *mapping,
2796 continue; 2796 continue;
2797 } 2797 }
2798 2798
2799 if (PageWriteback(page)) 2799 wait_on_page_writeback(page);
2800 wait_on_page_writeback(page);
2801
2802 BUG_ON(PageWriteback(page)); 2800 BUG_ON(PageWriteback(page));
2803 2801
2804 if (mpd->next_page != page->index) 2802 if (mpd->next_page != page->index)
@@ -3513,7 +3511,7 @@ retry:
3513 loff_t end = offset + iov_length(iov, nr_segs); 3511 loff_t end = offset + iov_length(iov, nr_segs);
3514 3512
3515 if (end > isize) 3513 if (end > isize)
3516 vmtruncate(inode, isize); 3514 ext4_truncate_failed_write(inode);
3517 } 3515 }
3518 } 3516 }
3519 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3517 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3916,9 +3914,30 @@ void ext4_set_aops(struct inode *inode)
3916int ext4_block_truncate_page(handle_t *handle, 3914int ext4_block_truncate_page(handle_t *handle,
3917 struct address_space *mapping, loff_t from) 3915 struct address_space *mapping, loff_t from)
3918{ 3916{
3917 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3918 unsigned length;
3919 unsigned blocksize;
3920 struct inode *inode = mapping->host;
3921
3922 blocksize = inode->i_sb->s_blocksize;
3923 length = blocksize - (offset & (blocksize - 1));
3924
3925 return ext4_block_zero_page_range(handle, mapping, from, length);
3926}
3927
3928/*
3929 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3930 * starting from file offset 'from'. The range to be zero'd must
3931 * be contained with in one block. If the specified range exceeds
3932 * the end of the block it will be shortened to end of the block
3933 * that cooresponds to 'from'
3934 */
3935int ext4_block_zero_page_range(handle_t *handle,
3936 struct address_space *mapping, loff_t from, loff_t length)
3937{
3919 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3938 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3920 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3939 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3921 unsigned blocksize, length, pos; 3940 unsigned blocksize, max, pos;
3922 ext4_lblk_t iblock; 3941 ext4_lblk_t iblock;
3923 struct inode *inode = mapping->host; 3942 struct inode *inode = mapping->host;
3924 struct buffer_head *bh; 3943 struct buffer_head *bh;
@@ -3931,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle,
3931 return -EINVAL; 3950 return -EINVAL;
3932 3951
3933 blocksize = inode->i_sb->s_blocksize; 3952 blocksize = inode->i_sb->s_blocksize;
3934 length = blocksize - (offset & (blocksize - 1)); 3953 max = blocksize - (offset & (blocksize - 1));
3954
3955 /*
3956 * correct length if it does not fall between
3957 * 'from' and the end of the block
3958 */
3959 if (length > max || length < 0)
3960 length = max;
3961
3935 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3962 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3936 3963
3937 if (!page_has_buffers(page)) 3964 if (!page_has_buffers(page))
@@ -4380,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4380 4407
4381int ext4_can_truncate(struct inode *inode) 4408int ext4_can_truncate(struct inode *inode)
4382{ 4409{
4383 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4384 return 0;
4385 if (S_ISREG(inode->i_mode)) 4410 if (S_ISREG(inode->i_mode))
4386 return 1; 4411 return 1;
4387 if (S_ISDIR(inode->i_mode)) 4412 if (S_ISDIR(inode->i_mode))
@@ -4392,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode)
4392} 4417}
4393 4418
4394/* 4419/*
4420 * ext4_punch_hole: punches a hole in a file by releaseing the blocks
4421 * associated with the given offset and length
4422 *
4423 * @inode: File inode
4424 * @offset: The offset where the hole will begin
4425 * @len: The length of the hole
4426 *
4427 * Returns: 0 on sucess or negative on failure
4428 */
4429
4430int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
4431{
4432 struct inode *inode = file->f_path.dentry->d_inode;
4433 if (!S_ISREG(inode->i_mode))
4434 return -ENOTSUPP;
4435
4436 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4437 /* TODO: Add support for non extent hole punching */
4438 return -ENOTSUPP;
4439 }
4440
4441 return ext4_ext_punch_hole(file, offset, length);
4442}
4443
4444/*
4395 * ext4_truncate() 4445 * ext4_truncate()
4396 * 4446 *
4397 * We block out ext4_get_block() block instantiations across the entire 4447 * We block out ext4_get_block() block instantiations across the entire
@@ -4617,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
4617 /* 4667 /*
4618 * Figure out the offset within the block group inode table 4668 * Figure out the offset within the block group inode table
4619 */ 4669 */
4620 inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); 4670 inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
4621 inode_offset = ((inode->i_ino - 1) % 4671 inode_offset = ((inode->i_ino - 1) %
4622 EXT4_INODES_PER_GROUP(sb)); 4672 EXT4_INODES_PER_GROUP(sb));
4623 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); 4673 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
@@ -5311,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5311 5361
5312 if (S_ISREG(inode->i_mode) && 5362 if (S_ISREG(inode->i_mode) &&
5313 attr->ia_valid & ATTR_SIZE && 5363 attr->ia_valid & ATTR_SIZE &&
5314 (attr->ia_size < inode->i_size || 5364 (attr->ia_size < inode->i_size)) {
5315 (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
5316 handle_t *handle; 5365 handle_t *handle;
5317 5366
5318 handle = ext4_journal_start(inode, 3); 5367 handle = ext4_journal_start(inode, 3);
@@ -5346,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5346 goto err_out; 5395 goto err_out;
5347 } 5396 }
5348 } 5397 }
5349 /* ext4_truncate will clear the flag */
5350 if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
5351 ext4_truncate(inode);
5352 } 5398 }
5353 5399
5354 if ((attr->ia_valid & ATTR_SIZE) && 5400 if (attr->ia_valid & ATTR_SIZE) {
5355 attr->ia_size != i_size_read(inode)) 5401 if (attr->ia_size != i_size_read(inode)) {
5356 rc = vmtruncate(inode, attr->ia_size); 5402 truncate_setsize(inode, attr->ia_size);
5403 ext4_truncate(inode);
5404 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
5405 ext4_truncate(inode);
5406 }
5357 5407
5358 if (!rc) { 5408 if (!rc) {
5359 setattr_copy(inode, attr); 5409 setattr_copy(inode, attr);
@@ -5811,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5811 goto out_unlock; 5861 goto out_unlock;
5812 } 5862 }
5813 ret = 0; 5863 ret = 0;
5814 if (PageMappedToDisk(page)) 5864
5815 goto out_unlock; 5865 lock_page(page);
5866 wait_on_page_writeback(page);
5867 if (PageMappedToDisk(page)) {
5868 up_read(&inode->i_alloc_sem);
5869 return VM_FAULT_LOCKED;
5870 }
5816 5871
5817 if (page->index == size >> PAGE_CACHE_SHIFT) 5872 if (page->index == size >> PAGE_CACHE_SHIFT)
5818 len = size & ~PAGE_CACHE_MASK; 5873 len = size & ~PAGE_CACHE_MASK;
5819 else 5874 else
5820 len = PAGE_CACHE_SIZE; 5875 len = PAGE_CACHE_SIZE;
5821 5876
5822 lock_page(page);
5823 /* 5877 /*
5824 * return if we have all the buffers mapped. This avoid 5878 * return if we have all the buffers mapped. This avoid
5825 * the need to call write_begin/write_end which does a 5879 * the need to call write_begin/write_end which does a
@@ -5829,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5829 if (page_has_buffers(page)) { 5883 if (page_has_buffers(page)) {
5830 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5831 ext4_bh_unmapped)) { 5885 ext4_bh_unmapped)) {
5832 unlock_page(page); 5886 up_read(&inode->i_alloc_sem);
5833 goto out_unlock; 5887 return VM_FAULT_LOCKED;
5834 } 5888 }
5835 } 5889 }
5836 unlock_page(page); 5890 unlock_page(page);
@@ -5850,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5850 if (ret < 0) 5904 if (ret < 0)
5851 goto out_unlock; 5905 goto out_unlock;
5852 ret = 0; 5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5853out_unlock: 5917out_unlock:
5854 if (ret) 5918 if (ret)
5855 ret = VM_FAULT_SIGBUS; 5919 ret = VM_FAULT_SIGBUS;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d8a16eecf1d5..859f2ae8864e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
787 struct inode *inode; 787 struct inode *inode;
788 char *data; 788 char *data;
789 char *bitmap; 789 char *bitmap;
790 struct ext4_group_info *grinfo;
790 791
791 mb_debug(1, "init page %lu\n", page->index); 792 mb_debug(1, "init page %lu\n", page->index);
792 793
@@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
819 if (first_group + i >= ngroups) 820 if (first_group + i >= ngroups)
820 break; 821 break;
821 822
823 grinfo = ext4_get_group_info(sb, first_group + i);
824 /*
825 * If page is uptodate then we came here after online resize
826 * which added some new uninitialized group info structs, so
827 * we must skip all initialized uptodate buddies on the page,
828 * which may be currently in use by an allocating task.
829 */
830 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
831 bh[i] = NULL;
832 continue;
833 }
834
822 err = -EIO; 835 err = -EIO;
823 desc = ext4_get_group_desc(sb, first_group + i, NULL); 836 desc = ext4_get_group_desc(sb, first_group + i, NULL);
824 if (desc == NULL) 837 if (desc == NULL)
@@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
871 } 884 }
872 885
873 /* wait for I/O completion */ 886 /* wait for I/O completion */
874 for (i = 0; i < groups_per_page && bh[i]; i++) 887 for (i = 0; i < groups_per_page; i++)
875 wait_on_buffer(bh[i]); 888 if (bh[i])
889 wait_on_buffer(bh[i]);
876 890
877 err = -EIO; 891 err = -EIO;
878 for (i = 0; i < groups_per_page && bh[i]; i++) 892 for (i = 0; i < groups_per_page; i++)
879 if (!buffer_uptodate(bh[i])) 893 if (bh[i] && !buffer_uptodate(bh[i]))
880 goto out; 894 goto out;
881 895
882 err = 0; 896 err = 0;
883 first_block = page->index * blocks_per_page; 897 first_block = page->index * blocks_per_page;
884 /* init the page */
885 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
886 for (i = 0; i < blocks_per_page; i++) { 898 for (i = 0; i < blocks_per_page; i++) {
887 int group; 899 int group;
888 struct ext4_group_info *grinfo;
889 900
890 group = (first_block + i) >> 1; 901 group = (first_block + i) >> 1;
891 if (group >= ngroups) 902 if (group >= ngroups)
892 break; 903 break;
893 904
905 if (!bh[group - first_group])
906 /* skip initialized uptodate buddy */
907 continue;
908
894 /* 909 /*
895 * data carry information regarding this 910 * data carry information regarding this
896 * particular group in the format specified 911 * particular group in the format specified
@@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
919 * incore got set to the group block bitmap below 934 * incore got set to the group block bitmap below
920 */ 935 */
921 ext4_lock_group(sb, group); 936 ext4_lock_group(sb, group);
937 /* init the buddy */
938 memset(data, 0xff, blocksize);
922 ext4_mb_generate_buddy(sb, data, incore, group); 939 ext4_mb_generate_buddy(sb, data, incore, group);
923 ext4_unlock_group(sb, group); 940 ext4_unlock_group(sb, group);
924 incore = NULL; 941 incore = NULL;
@@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
948 965
949out: 966out:
950 if (bh) { 967 if (bh) {
951 for (i = 0; i < groups_per_page && bh[i]; i++) 968 for (i = 0; i < groups_per_page; i++)
952 brelse(bh[i]); 969 brelse(bh[i]);
953 if (bh != &bhs) 970 if (bh != &bhs)
954 kfree(bh); 971 kfree(bh);
@@ -957,22 +974,21 @@ out:
957} 974}
958 975
959/* 976/*
960 * lock the group_info alloc_sem of all the groups 977 * Lock the buddy and bitmap pages. This make sure other parallel init_group
961 * belonging to the same buddy cache page. This 978 * on the same buddy page doesn't happen whild holding the buddy page lock.
962 * make sure other parallel operation on the buddy 979 * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
963 * cache doesn't happen whild holding the buddy cache 980 * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
964 * lock
965 */ 981 */
966static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, 982static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
967 ext4_group_t group) 983 ext4_group_t group, struct ext4_buddy *e4b)
968{ 984{
969 int i; 985 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
970 int block, pnum; 986 int block, pnum, poff;
971 int blocks_per_page; 987 int blocks_per_page;
972 int groups_per_page; 988 struct page *page;
973 ext4_group_t ngroups = ext4_get_groups_count(sb); 989
974 ext4_group_t first_group; 990 e4b->bd_buddy_page = NULL;
975 struct ext4_group_info *grp; 991 e4b->bd_bitmap_page = NULL;
976 992
977 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 993 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
978 /* 994 /*
@@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
982 */ 998 */
983 block = group * 2; 999 block = group * 2;
984 pnum = block / blocks_per_page; 1000 pnum = block / blocks_per_page;
985 first_group = pnum * blocks_per_page / 2; 1001 poff = block % blocks_per_page;
986 1002 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
987 groups_per_page = blocks_per_page >> 1; 1003 if (!page)
988 if (groups_per_page == 0) 1004 return -EIO;
989 groups_per_page = 1; 1005 BUG_ON(page->mapping != inode->i_mapping);
990 /* read all groups the page covers into the cache */ 1006 e4b->bd_bitmap_page = page;
991 for (i = 0; i < groups_per_page; i++) { 1007 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
992 1008
993 if ((first_group + i) >= ngroups) 1009 if (blocks_per_page >= 2) {
994 break; 1010 /* buddy and bitmap are on the same page */
995 grp = ext4_get_group_info(sb, first_group + i); 1011 return 0;
996 /* take all groups write allocation
997 * semaphore. This make sure there is
998 * no block allocation going on in any
999 * of that groups
1000 */
1001 down_write_nested(&grp->alloc_sem, i);
1002 } 1012 }
1003 return i; 1013
1014 block++;
1015 pnum = block / blocks_per_page;
1016 poff = block % blocks_per_page;
1017 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1018 if (!page)
1019 return -EIO;
1020 BUG_ON(page->mapping != inode->i_mapping);
1021 e4b->bd_buddy_page = page;
1022 return 0;
1004} 1023}
1005 1024
1006static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, 1025static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1007 ext4_group_t group, int locked_group)
1008{ 1026{
1009 int i; 1027 if (e4b->bd_bitmap_page) {
1010 int block, pnum; 1028 unlock_page(e4b->bd_bitmap_page);
1011 int blocks_per_page; 1029 page_cache_release(e4b->bd_bitmap_page);
1012 ext4_group_t first_group; 1030 }
1013 struct ext4_group_info *grp; 1031 if (e4b->bd_buddy_page) {
1014 1032 unlock_page(e4b->bd_buddy_page);
1015 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 1033 page_cache_release(e4b->bd_buddy_page);
1016 /*
1017 * the buddy cache inode stores the block bitmap
1018 * and buddy information in consecutive blocks.
1019 * So for each group we need two blocks.
1020 */
1021 block = group * 2;
1022 pnum = block / blocks_per_page;
1023 first_group = pnum * blocks_per_page / 2;
1024 /* release locks on all the groups */
1025 for (i = 0; i < locked_group; i++) {
1026
1027 grp = ext4_get_group_info(sb, first_group + i);
1028 /* take all groups write allocation
1029 * semaphore. This make sure there is
1030 * no block allocation going on in any
1031 * of that groups
1032 */
1033 up_write(&grp->alloc_sem);
1034 } 1034 }
1035
1036} 1035}
1037 1036
1038/* 1037/*
@@ -1044,93 +1043,60 @@ static noinline_for_stack
1044int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) 1043int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1045{ 1044{
1046 1045
1047 int ret = 0;
1048 void *bitmap;
1049 int blocks_per_page;
1050 int block, pnum, poff;
1051 int num_grp_locked = 0;
1052 struct ext4_group_info *this_grp; 1046 struct ext4_group_info *this_grp;
1053 struct ext4_sb_info *sbi = EXT4_SB(sb); 1047 struct ext4_buddy e4b;
1054 struct inode *inode = sbi->s_buddy_cache; 1048 struct page *page;
1055 struct page *page = NULL, *bitmap_page = NULL; 1049 int ret = 0;
1056 1050
1057 mb_debug(1, "init group %u\n", group); 1051 mb_debug(1, "init group %u\n", group);
1058 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1059 this_grp = ext4_get_group_info(sb, group); 1052 this_grp = ext4_get_group_info(sb, group);
1060 /* 1053 /*
1061 * This ensures that we don't reinit the buddy cache 1054 * This ensures that we don't reinit the buddy cache
1062 * page which map to the group from which we are already 1055 * page which map to the group from which we are already
1063 * allocating. If we are looking at the buddy cache we would 1056 * allocating. If we are looking at the buddy cache we would
1064 * have taken a reference using ext4_mb_load_buddy and that 1057 * have taken a reference using ext4_mb_load_buddy and that
1065 * would have taken the alloc_sem lock. 1058 * would have pinned buddy page to page cache.
1066 */ 1059 */
1067 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); 1060 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1068 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { 1061 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1069 /* 1062 /*
1070 * somebody initialized the group 1063 * somebody initialized the group
1071 * return without doing anything 1064 * return without doing anything
1072 */ 1065 */
1073 ret = 0;
1074 goto err; 1066 goto err;
1075 } 1067 }
1076 /* 1068
1077 * the buddy cache inode stores the block bitmap 1069 page = e4b.bd_bitmap_page;
1078 * and buddy information in consecutive blocks. 1070 ret = ext4_mb_init_cache(page, NULL);
1079 * So for each group we need two blocks. 1071 if (ret)
1080 */ 1072 goto err;
1081 block = group * 2; 1073 if (!PageUptodate(page)) {
1082 pnum = block / blocks_per_page;
1083 poff = block % blocks_per_page;
1084 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1085 if (page) {
1086 BUG_ON(page->mapping != inode->i_mapping);
1087 ret = ext4_mb_init_cache(page, NULL);
1088 if (ret) {
1089 unlock_page(page);
1090 goto err;
1091 }
1092 unlock_page(page);
1093 }
1094 if (page == NULL || !PageUptodate(page)) {
1095 ret = -EIO; 1074 ret = -EIO;
1096 goto err; 1075 goto err;
1097 } 1076 }
1098 mark_page_accessed(page); 1077 mark_page_accessed(page);
1099 bitmap_page = page;
1100 bitmap = page_address(page) + (poff * sb->s_blocksize);
1101 1078
1102 /* init buddy cache */ 1079 if (e4b.bd_buddy_page == NULL) {
1103 block++;
1104 pnum = block / blocks_per_page;
1105 poff = block % blocks_per_page;
1106 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1107 if (page == bitmap_page) {
1108 /* 1080 /*
1109 * If both the bitmap and buddy are in 1081 * If both the bitmap and buddy are in
1110 * the same page we don't need to force 1082 * the same page we don't need to force
1111 * init the buddy 1083 * init the buddy
1112 */ 1084 */
1113 unlock_page(page); 1085 ret = 0;
1114 } else if (page) { 1086 goto err;
1115 BUG_ON(page->mapping != inode->i_mapping);
1116 ret = ext4_mb_init_cache(page, bitmap);
1117 if (ret) {
1118 unlock_page(page);
1119 goto err;
1120 }
1121 unlock_page(page);
1122 } 1087 }
1123 if (page == NULL || !PageUptodate(page)) { 1088 /* init buddy cache */
1089 page = e4b.bd_buddy_page;
1090 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1091 if (ret)
1092 goto err;
1093 if (!PageUptodate(page)) {
1124 ret = -EIO; 1094 ret = -EIO;
1125 goto err; 1095 goto err;
1126 } 1096 }
1127 mark_page_accessed(page); 1097 mark_page_accessed(page);
1128err: 1098err:
1129 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); 1099 ext4_mb_put_buddy_page_lock(&e4b);
1130 if (bitmap_page)
1131 page_cache_release(bitmap_page);
1132 if (page)
1133 page_cache_release(page);
1134 return ret; 1100 return ret;
1135} 1101}
1136 1102
@@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1164 e4b->bd_group = group; 1130 e4b->bd_group = group;
1165 e4b->bd_buddy_page = NULL; 1131 e4b->bd_buddy_page = NULL;
1166 e4b->bd_bitmap_page = NULL; 1132 e4b->bd_bitmap_page = NULL;
1167 e4b->alloc_semp = &grp->alloc_sem;
1168
1169 /* Take the read lock on the group alloc
1170 * sem. This would make sure a parallel
1171 * ext4_mb_init_group happening on other
1172 * groups mapped by the page is blocked
1173 * till we are done with allocation
1174 */
1175repeat_load_buddy:
1176 down_read(e4b->alloc_semp);
1177 1133
1178 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 1134 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1179 /* we need to check for group need init flag
1180 * with alloc_semp held so that we can be sure
1181 * that new blocks didn't get added to the group
1182 * when we are loading the buddy cache
1183 */
1184 up_read(e4b->alloc_semp);
1185 /* 1135 /*
1186 * we need full data about the group 1136 * we need full data about the group
1187 * to make a good selection 1137 * to make a good selection
@@ -1189,7 +1139,6 @@ repeat_load_buddy:
1189 ret = ext4_mb_init_group(sb, group); 1139 ret = ext4_mb_init_group(sb, group);
1190 if (ret) 1140 if (ret)
1191 return ret; 1141 return ret;
1192 goto repeat_load_buddy;
1193 } 1142 }
1194 1143
1195 /* 1144 /*
@@ -1273,15 +1222,14 @@ repeat_load_buddy:
1273 return 0; 1222 return 0;
1274 1223
1275err: 1224err:
1225 if (page)
1226 page_cache_release(page);
1276 if (e4b->bd_bitmap_page) 1227 if (e4b->bd_bitmap_page)
1277 page_cache_release(e4b->bd_bitmap_page); 1228 page_cache_release(e4b->bd_bitmap_page);
1278 if (e4b->bd_buddy_page) 1229 if (e4b->bd_buddy_page)
1279 page_cache_release(e4b->bd_buddy_page); 1230 page_cache_release(e4b->bd_buddy_page);
1280 e4b->bd_buddy = NULL; 1231 e4b->bd_buddy = NULL;
1281 e4b->bd_bitmap = NULL; 1232 e4b->bd_bitmap = NULL;
1282
1283 /* Done with the buddy cache */
1284 up_read(e4b->alloc_semp);
1285 return ret; 1233 return ret;
1286} 1234}
1287 1235
@@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1291 page_cache_release(e4b->bd_bitmap_page); 1239 page_cache_release(e4b->bd_bitmap_page);
1292 if (e4b->bd_buddy_page) 1240 if (e4b->bd_buddy_page)
1293 page_cache_release(e4b->bd_buddy_page); 1241 page_cache_release(e4b->bd_buddy_page);
1294 /* Done with the buddy cache */
1295 if (e4b->alloc_semp)
1296 up_read(e4b->alloc_semp);
1297} 1242}
1298 1243
1299 1244
@@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1606 get_page(ac->ac_bitmap_page); 1551 get_page(ac->ac_bitmap_page);
1607 ac->ac_buddy_page = e4b->bd_buddy_page; 1552 ac->ac_buddy_page = e4b->bd_buddy_page;
1608 get_page(ac->ac_buddy_page); 1553 get_page(ac->ac_buddy_page);
1609 /* on allocation we use ac to track the held semaphore */
1610 ac->alloc_semp = e4b->alloc_semp;
1611 e4b->alloc_semp = NULL;
1612 /* store last allocated for subsequent stream allocation */ 1554 /* store last allocated for subsequent stream allocation */
1613 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { 1555 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1614 spin_lock(&sbi->s_md_lock); 1556 spin_lock(&sbi->s_md_lock);
@@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2659 struct super_block *sb = journal->j_private; 2601 struct super_block *sb = journal->j_private;
2660 struct ext4_buddy e4b; 2602 struct ext4_buddy e4b;
2661 struct ext4_group_info *db; 2603 struct ext4_group_info *db;
2662 int err, ret, count = 0, count2 = 0; 2604 int err, count = 0, count2 = 0;
2663 struct ext4_free_data *entry; 2605 struct ext4_free_data *entry;
2664 struct list_head *l, *ltmp; 2606 struct list_head *l, *ltmp;
2665 2607
@@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2669 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2611 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2670 entry->count, entry->group, entry); 2612 entry->count, entry->group, entry);
2671 2613
2672 if (test_opt(sb, DISCARD)) { 2614 if (test_opt(sb, DISCARD))
2673 ret = ext4_issue_discard(sb, entry->group, 2615 ext4_issue_discard(sb, entry->group,
2674 entry->start_blk, entry->count); 2616 entry->start_blk, entry->count);
2675 if (unlikely(ret == -EOPNOTSUPP)) {
2676 ext4_warning(sb, "discard not supported, "
2677 "disabling");
2678 clear_opt(sb, DISCARD);
2679 }
2680 }
2681 2617
2682 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2618 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2683 /* we expect to find existing buddy because it's pinned */ 2619 /* we expect to find existing buddy because it's pinned */
@@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4226 spin_unlock(&pa->pa_lock); 4162 spin_unlock(&pa->pa_lock);
4227 } 4163 }
4228 } 4164 }
4229 if (ac->alloc_semp)
4230 up_read(ac->alloc_semp);
4231 if (pa) { 4165 if (pa) {
4232 /* 4166 /*
4233 * We want to add the pa to the right bucket. 4167 * We want to add the pa to the right bucket.
4234 * Remove it from the list and while adding 4168 * Remove it from the list and while adding
4235 * make sure the list to which we are adding 4169 * make sure the list to which we are adding
4236 * doesn't grow big. We need to release 4170 * doesn't grow big.
4237 * alloc_semp before calling ext4_mb_add_n_trim()
4238 */ 4171 */
4239 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { 4172 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4240 spin_lock(pa->pa_obj_lock); 4173 spin_lock(pa->pa_obj_lock);
@@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4303 * there is enough free blocks to do block allocation 4236 * there is enough free blocks to do block allocation
4304 * and verify allocation doesn't exceed the quota limits. 4237 * and verify allocation doesn't exceed the quota limits.
4305 */ 4238 */
4306 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { 4239 while (ar->len &&
4240 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
4241
4307 /* let others to free the space */ 4242 /* let others to free the space */
4308 yield(); 4243 yield();
4309 ar->len = ar->len >> 1; 4244 ar->len = ar->len >> 1;
@@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4313 return 0; 4248 return 0;
4314 } 4249 }
4315 reserv_blks = ar->len; 4250 reserv_blks = ar->len;
4316 while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { 4251 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4317 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4252 dquot_alloc_block_nofail(ar->inode, ar->len);
4318 ar->len--; 4253 } else {
4254 while (ar->len &&
4255 dquot_alloc_block(ar->inode, ar->len)) {
4256
4257 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4258 ar->len--;
4259 }
4319 } 4260 }
4320 inquota = ar->len; 4261 inquota = ar->len;
4321 if (ar->len == 0) { 4262 if (ar->len == 0) {
@@ -4704,6 +4645,127 @@ error_return:
4704} 4645}
4705 4646
4706/** 4647/**
4648 * ext4_add_groupblocks() -- Add given blocks to an existing group
4649 * @handle: handle to this transaction
4650 * @sb: super block
4651 * @block: start physcial block to add to the block group
4652 * @count: number of blocks to free
4653 *
4654 * This marks the blocks as free in the bitmap and buddy.
4655 */
4656void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
4657 ext4_fsblk_t block, unsigned long count)
4658{
4659 struct buffer_head *bitmap_bh = NULL;
4660 struct buffer_head *gd_bh;
4661 ext4_group_t block_group;
4662 ext4_grpblk_t bit;
4663 unsigned int i;
4664 struct ext4_group_desc *desc;
4665 struct ext4_sb_info *sbi = EXT4_SB(sb);
4666 struct ext4_buddy e4b;
4667 int err = 0, ret, blk_free_count;
4668 ext4_grpblk_t blocks_freed;
4669 struct ext4_group_info *grp;
4670
4671 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4672
4673 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4674 grp = ext4_get_group_info(sb, block_group);
4675 /*
4676 * Check to see if we are freeing blocks across a group
4677 * boundary.
4678 */
4679 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
4680 goto error_return;
4681
4682 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4683 if (!bitmap_bh)
4684 goto error_return;
4685 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4686 if (!desc)
4687 goto error_return;
4688
4689 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4690 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4691 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4692 in_range(block + count - 1, ext4_inode_table(sb, desc),
4693 sbi->s_itb_per_group)) {
4694 ext4_error(sb, "Adding blocks in system zones - "
4695 "Block = %llu, count = %lu",
4696 block, count);
4697 goto error_return;
4698 }
4699
4700 BUFFER_TRACE(bitmap_bh, "getting write access");
4701 err = ext4_journal_get_write_access(handle, bitmap_bh);
4702 if (err)
4703 goto error_return;
4704
4705 /*
4706 * We are about to modify some metadata. Call the journal APIs
4707 * to unshare ->b_data if a currently-committing transaction is
4708 * using it
4709 */
4710 BUFFER_TRACE(gd_bh, "get_write_access");
4711 err = ext4_journal_get_write_access(handle, gd_bh);
4712 if (err)
4713 goto error_return;
4714
4715 for (i = 0, blocks_freed = 0; i < count; i++) {
4716 BUFFER_TRACE(bitmap_bh, "clear bit");
4717 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4718 ext4_error(sb, "bit already cleared for block %llu",
4719 (ext4_fsblk_t)(block + i));
4720 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4721 } else {
4722 blocks_freed++;
4723 }
4724 }
4725
4726 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4727 if (err)
4728 goto error_return;
4729
4730 /*
4731 * need to update group_info->bb_free and bitmap
4732 * with group lock held. generate_buddy look at
4733 * them with group lock_held
4734 */
4735 ext4_lock_group(sb, block_group);
4736 mb_clear_bits(bitmap_bh->b_data, bit, count);
4737 mb_free_blocks(NULL, &e4b, bit, count);
4738 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
4739 ext4_free_blks_set(sb, desc, blk_free_count);
4740 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4741 ext4_unlock_group(sb, block_group);
4742 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
4743
4744 if (sbi->s_log_groups_per_flex) {
4745 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4746 atomic_add(blocks_freed,
4747 &sbi->s_flex_groups[flex_group].free_blocks);
4748 }
4749
4750 ext4_mb_unload_buddy(&e4b);
4751
4752 /* We dirtied the bitmap block */
4753 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4754 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4755
4756 /* And the group descriptor block */
4757 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4758 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4759 if (!err)
4760 err = ret;
4761
4762error_return:
4763 brelse(bitmap_bh);
4764 ext4_std_error(sb, err);
4765 return;
4766}
4767
4768/**
4707 * ext4_trim_extent -- function to TRIM one single free extent in the group 4769 * ext4_trim_extent -- function to TRIM one single free extent in the group
4708 * @sb: super block for the file system 4770 * @sb: super block for the file system
4709 * @start: starting block of the free extent in the alloc. group 4771 * @start: starting block of the free extent in the alloc. group
@@ -4715,11 +4777,10 @@ error_return:
4715 * one will allocate those blocks, mark it as used in buddy bitmap. This must 4777 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4716 * be called with under the group lock. 4778 * be called with under the group lock.
4717 */ 4779 */
4718static int ext4_trim_extent(struct super_block *sb, int start, int count, 4780static void ext4_trim_extent(struct super_block *sb, int start, int count,
4719 ext4_group_t group, struct ext4_buddy *e4b) 4781 ext4_group_t group, struct ext4_buddy *e4b)
4720{ 4782{
4721 struct ext4_free_extent ex; 4783 struct ext4_free_extent ex;
4722 int ret = 0;
4723 4784
4724 assert_spin_locked(ext4_group_lock_ptr(sb, group)); 4785 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4725 4786
@@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4733 */ 4794 */
4734 mb_mark_used(e4b, &ex); 4795 mb_mark_used(e4b, &ex);
4735 ext4_unlock_group(sb, group); 4796 ext4_unlock_group(sb, group);
4736 4797 ext4_issue_discard(sb, group, start, count);
4737 ret = ext4_issue_discard(sb, group, start, count);
4738
4739 ext4_lock_group(sb, group); 4798 ext4_lock_group(sb, group);
4740 mb_free_blocks(NULL, e4b, start, ex.fe_len); 4799 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4741 return ret;
4742} 4800}
4743 4801
4744/** 4802/**
@@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4760 * the group buddy bitmap. This is done until whole group is scanned. 4818 * the group buddy bitmap. This is done until whole group is scanned.
4761 */ 4819 */
4762static ext4_grpblk_t 4820static ext4_grpblk_t
4763ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, 4821ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4764 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) 4822 ext4_grpblk_t start, ext4_grpblk_t max,
4823 ext4_grpblk_t minblocks)
4765{ 4824{
4766 void *bitmap; 4825 void *bitmap;
4767 ext4_grpblk_t next, count = 0; 4826 ext4_grpblk_t next, count = 0;
4768 ext4_group_t group; 4827 struct ext4_buddy e4b;
4769 int ret = 0; 4828 int ret;
4770 4829
4771 BUG_ON(e4b == NULL); 4830 ret = ext4_mb_load_buddy(sb, group, &e4b);
4831 if (ret) {
4832 ext4_error(sb, "Error in loading buddy "
4833 "information for %u", group);
4834 return ret;
4835 }
4836 bitmap = e4b.bd_bitmap;
4772 4837
4773 bitmap = e4b->bd_bitmap;
4774 group = e4b->bd_group;
4775 start = (e4b->bd_info->bb_first_free > start) ?
4776 e4b->bd_info->bb_first_free : start;
4777 ext4_lock_group(sb, group); 4838 ext4_lock_group(sb, group);
4839 start = (e4b.bd_info->bb_first_free > start) ?
4840 e4b.bd_info->bb_first_free : start;
4778 4841
4779 while (start < max) { 4842 while (start < max) {
4780 start = mb_find_next_zero_bit(bitmap, max, start); 4843 start = mb_find_next_zero_bit(bitmap, max, start);
@@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4783 next = mb_find_next_bit(bitmap, max, start); 4846 next = mb_find_next_bit(bitmap, max, start);
4784 4847
4785 if ((next - start) >= minblocks) { 4848 if ((next - start) >= minblocks) {
4786 ret = ext4_trim_extent(sb, start, 4849 ext4_trim_extent(sb, start,
4787 next - start, group, e4b); 4850 next - start, group, &e4b);
4788 if (ret < 0)
4789 break;
4790 count += next - start; 4851 count += next - start;
4791 } 4852 }
4792 start = next + 1; 4853 start = next + 1;
@@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4802 ext4_lock_group(sb, group); 4863 ext4_lock_group(sb, group);
4803 } 4864 }
4804 4865
4805 if ((e4b->bd_info->bb_free - count) < minblocks) 4866 if ((e4b.bd_info->bb_free - count) < minblocks)
4806 break; 4867 break;
4807 } 4868 }
4808 ext4_unlock_group(sb, group); 4869 ext4_unlock_group(sb, group);
4870 ext4_mb_unload_buddy(&e4b);
4809 4871
4810 ext4_debug("trimmed %d blocks in the group %d\n", 4872 ext4_debug("trimmed %d blocks in the group %d\n",
4811 count, group); 4873 count, group);
4812 4874
4813 if (ret < 0)
4814 count = ret;
4815
4816 return count; 4875 return count;
4817} 4876}
4818 4877
@@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4830 */ 4889 */
4831int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) 4890int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4832{ 4891{
4833 struct ext4_buddy e4b; 4892 struct ext4_group_info *grp;
4834 ext4_group_t first_group, last_group; 4893 ext4_group_t first_group, last_group;
4835 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 4894 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4836 ext4_grpblk_t cnt = 0, first_block, last_block; 4895 ext4_grpblk_t cnt = 0, first_block, last_block;
4837 uint64_t start, len, minlen, trimmed; 4896 uint64_t start, len, minlen, trimmed = 0;
4838 ext4_fsblk_t first_data_blk = 4897 ext4_fsblk_t first_data_blk =
4839 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 4898 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4840 int ret = 0; 4899 int ret = 0;
@@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4842 start = range->start >> sb->s_blocksize_bits; 4901 start = range->start >> sb->s_blocksize_bits;
4843 len = range->len >> sb->s_blocksize_bits; 4902 len = range->len >> sb->s_blocksize_bits;
4844 minlen = range->minlen >> sb->s_blocksize_bits; 4903 minlen = range->minlen >> sb->s_blocksize_bits;
4845 trimmed = 0;
4846 4904
4847 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 4905 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4848 return -EINVAL; 4906 return -EINVAL;
@@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4863 return -EINVAL; 4921 return -EINVAL;
4864 4922
4865 for (group = first_group; group <= last_group; group++) { 4923 for (group = first_group; group <= last_group; group++) {
4866 ret = ext4_mb_load_buddy(sb, group, &e4b); 4924 grp = ext4_get_group_info(sb, group);
4867 if (ret) { 4925 /* We only do this if the grp has never been initialized */
4868 ext4_error(sb, "Error in loading buddy " 4926 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
4869 "information for %u", group); 4927 ret = ext4_mb_init_group(sb, group);
4870 break; 4928 if (ret)
4929 break;
4871 } 4930 }
4872 4931
4873 /* 4932 /*
@@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4880 last_block = first_block + len; 4939 last_block = first_block + len;
4881 len -= last_block - first_block; 4940 len -= last_block - first_block;
4882 4941
4883 if (e4b.bd_info->bb_free >= minlen) { 4942 if (grp->bb_free >= minlen) {
4884 cnt = ext4_trim_all_free(sb, &e4b, first_block, 4943 cnt = ext4_trim_all_free(sb, group, first_block,
4885 last_block, minlen); 4944 last_block, minlen);
4886 if (cnt < 0) { 4945 if (cnt < 0) {
4887 ret = cnt; 4946 ret = cnt;
4888 ext4_mb_unload_buddy(&e4b);
4889 break; 4947 break;
4890 } 4948 }
4891 } 4949 }
4892 ext4_mb_unload_buddy(&e4b);
4893 trimmed += cnt; 4950 trimmed += cnt;
4894 first_block = 0; 4951 first_block = 0;
4895 } 4952 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 22bd4d7f289b..20b5e7bfebd1 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -193,11 +193,6 @@ struct ext4_allocation_context {
193 __u8 ac_op; /* operation, for history only */ 193 __u8 ac_op; /* operation, for history only */
194 struct page *ac_bitmap_page; 194 struct page *ac_bitmap_page;
195 struct page *ac_buddy_page; 195 struct page *ac_buddy_page;
196 /*
197 * pointer to the held semaphore upon successful
198 * block allocation
199 */
200 struct rw_semaphore *alloc_semp;
201 struct ext4_prealloc_space *ac_pa; 196 struct ext4_prealloc_space *ac_pa;
202 struct ext4_locality_group *ac_lg; 197 struct ext4_locality_group *ac_lg;
203}; 198};
@@ -215,7 +210,6 @@ struct ext4_buddy {
215 struct super_block *bd_sb; 210 struct super_block *bd_sb;
216 __u16 bd_blkbits; 211 __u16 bd_blkbits;
217 ext4_group_t bd_group; 212 ext4_group_t bd_group;
218 struct rw_semaphore *alloc_semp;
219}; 213};
220#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 214#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
221#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 215#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 92816b4e0f16..b57b98fb44d1 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
376 * We have the extent map build with the tmp inode. 376 * We have the extent map build with the tmp inode.
377 * Now copy the i_data across 377 * Now copy the i_data across
378 */ 378 */
379 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); 379 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
380 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 380 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
381 381
382 /* 382 /*
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
new file mode 100644
index 000000000000..9bdef3f537c5
--- /dev/null
+++ b/fs/ext4/mmp.c
@@ -0,0 +1,351 @@
1#include <linux/fs.h>
2#include <linux/random.h>
3#include <linux/buffer_head.h>
4#include <linux/utsname.h>
5#include <linux/kthread.h>
6
7#include "ext4.h"
8
9/*
10 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
11 * faster.
12 */
13static int write_mmp_block(struct buffer_head *bh)
14{
15 mark_buffer_dirty(bh);
16 lock_buffer(bh);
17 bh->b_end_io = end_buffer_write_sync;
18 get_bh(bh);
19 submit_bh(WRITE_SYNC, bh);
20 wait_on_buffer(bh);
21 if (unlikely(!buffer_uptodate(bh)))
22 return 1;
23
24 return 0;
25}
26
27/*
28 * Read the MMP block. It _must_ be read from disk and hence we clear the
29 * uptodate flag on the buffer.
30 */
31static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
32 ext4_fsblk_t mmp_block)
33{
34 struct mmp_struct *mmp;
35
36 if (*bh)
37 clear_buffer_uptodate(*bh);
38
39 /* This would be sb_bread(sb, mmp_block), except we need to be sure
40 * that the MD RAID device cache has been bypassed, and that the read
41 * is not blocked in the elevator. */
42 if (!*bh)
43 *bh = sb_getblk(sb, mmp_block);
44 if (*bh) {
45 get_bh(*bh);
46 lock_buffer(*bh);
47 (*bh)->b_end_io = end_buffer_read_sync;
48 submit_bh(READ_SYNC, *bh);
49 wait_on_buffer(*bh);
50 if (!buffer_uptodate(*bh)) {
51 brelse(*bh);
52 *bh = NULL;
53 }
54 }
55 if (!*bh) {
56 ext4_warning(sb, "Error while reading MMP block %llu",
57 mmp_block);
58 return -EIO;
59 }
60
61 mmp = (struct mmp_struct *)((*bh)->b_data);
62 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
63 return -EINVAL;
64
65 return 0;
66}
67
68/*
69 * Dump as much information as possible to help the admin.
70 */
71void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
72 const char *function, unsigned int line, const char *msg)
73{
74 __ext4_warning(sb, function, line, msg);
75 __ext4_warning(sb, function, line,
76 "MMP failure info: last update time: %llu, last update "
77 "node: %s, last update device: %s\n",
78 (long long unsigned int) le64_to_cpu(mmp->mmp_time),
79 mmp->mmp_nodename, mmp->mmp_bdevname);
80}
81
82/*
83 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
84 */
85static int kmmpd(void *data)
86{
87 struct super_block *sb = ((struct mmpd_data *) data)->sb;
88 struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
89 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
90 struct mmp_struct *mmp;
91 ext4_fsblk_t mmp_block;
92 u32 seq = 0;
93 unsigned long failed_writes = 0;
94 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
95 unsigned mmp_check_interval;
96 unsigned long last_update_time;
97 unsigned long diff;
98 int retval;
99
100 mmp_block = le64_to_cpu(es->s_mmp_block);
101 mmp = (struct mmp_struct *)(bh->b_data);
102 mmp->mmp_time = cpu_to_le64(get_seconds());
103 /*
104 * Start with the higher mmp_check_interval and reduce it if
105 * the MMP block is being updated on time.
106 */
107 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
108 EXT4_MMP_MIN_CHECK_INTERVAL);
109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
110 bdevname(bh->b_bdev, mmp->mmp_bdevname);
111
112 memcpy(mmp->mmp_nodename, init_utsname()->sysname,
113 sizeof(mmp->mmp_nodename));
114
115 while (!kthread_should_stop()) {
116 if (++seq > EXT4_MMP_SEQ_MAX)
117 seq = 1;
118
119 mmp->mmp_seq = cpu_to_le32(seq);
120 mmp->mmp_time = cpu_to_le64(get_seconds());
121 last_update_time = jiffies;
122
123 retval = write_mmp_block(bh);
124 /*
125 * Don't spew too many error messages. Print one every
126 * (s_mmp_update_interval * 60) seconds.
127 */
128 if (retval && (failed_writes % 60) == 0) {
129 ext4_error(sb, "Error writing to MMP block");
130 failed_writes++;
131 }
132
133 if (!(le32_to_cpu(es->s_feature_incompat) &
134 EXT4_FEATURE_INCOMPAT_MMP)) {
135 ext4_warning(sb, "kmmpd being stopped since MMP feature"
136 " has been disabled.");
137 EXT4_SB(sb)->s_mmp_tsk = NULL;
138 goto failed;
139 }
140
141 if (sb->s_flags & MS_RDONLY) {
142 ext4_warning(sb, "kmmpd being stopped since filesystem "
143 "has been remounted as readonly.");
144 EXT4_SB(sb)->s_mmp_tsk = NULL;
145 goto failed;
146 }
147
148 diff = jiffies - last_update_time;
149 if (diff < mmp_update_interval * HZ)
150 schedule_timeout_interruptible(mmp_update_interval *
151 HZ - diff);
152
153 /*
154 * We need to make sure that more than mmp_check_interval
155 * seconds have not passed since writing. If that has happened
156 * we need to check if the MMP block is as we left it.
157 */
158 diff = jiffies - last_update_time;
159 if (diff > mmp_check_interval * HZ) {
160 struct buffer_head *bh_check = NULL;
161 struct mmp_struct *mmp_check;
162
163 retval = read_mmp_block(sb, &bh_check, mmp_block);
164 if (retval) {
165 ext4_error(sb, "error reading MMP data: %d",
166 retval);
167
168 EXT4_SB(sb)->s_mmp_tsk = NULL;
169 goto failed;
170 }
171
172 mmp_check = (struct mmp_struct *)(bh_check->b_data);
173 if (mmp->mmp_seq != mmp_check->mmp_seq ||
174 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
175 sizeof(mmp->mmp_nodename))) {
176 dump_mmp_msg(sb, mmp_check,
177 "Error while updating MMP info. "
178 "The filesystem seems to have been"
179 " multiply mounted.");
180 ext4_error(sb, "abort");
181 goto failed;
182 }
183 put_bh(bh_check);
184 }
185
186 /*
187 * Adjust the mmp_check_interval depending on how much time
188 * it took for the MMP block to be written.
189 */
190 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
191 EXT4_MMP_MAX_CHECK_INTERVAL),
192 EXT4_MMP_MIN_CHECK_INTERVAL);
193 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
194 }
195
196 /*
197 * Unmount seems to be clean.
198 */
199 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
200 mmp->mmp_time = cpu_to_le64(get_seconds());
201
202 retval = write_mmp_block(bh);
203
204failed:
205 kfree(data);
206 brelse(bh);
207 return retval;
208}
209
210/*
211 * Get a random new sequence number but make sure it is not greater than
212 * EXT4_MMP_SEQ_MAX.
213 */
214static unsigned int mmp_new_seq(void)
215{
216 u32 new_seq;
217
218 do {
219 get_random_bytes(&new_seq, sizeof(u32));
220 } while (new_seq > EXT4_MMP_SEQ_MAX);
221
222 return new_seq;
223}
224
225/*
226 * Protect the filesystem from being mounted more than once.
227 */
228int ext4_multi_mount_protect(struct super_block *sb,
229 ext4_fsblk_t mmp_block)
230{
231 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
232 struct buffer_head *bh = NULL;
233 struct mmp_struct *mmp = NULL;
234 struct mmpd_data *mmpd_data;
235 u32 seq;
236 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
237 unsigned int wait_time = 0;
238 int retval;
239
240 if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
241 mmp_block >= ext4_blocks_count(es)) {
242 ext4_warning(sb, "Invalid MMP block in superblock");
243 goto failed;
244 }
245
246 retval = read_mmp_block(sb, &bh, mmp_block);
247 if (retval)
248 goto failed;
249
250 mmp = (struct mmp_struct *)(bh->b_data);
251
252 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
253 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
254
255 /*
256 * If check_interval in MMP block is larger, use that instead of
257 * update_interval from the superblock.
258 */
259 if (mmp->mmp_check_interval > mmp_check_interval)
260 mmp_check_interval = mmp->mmp_check_interval;
261
262 seq = le32_to_cpu(mmp->mmp_seq);
263 if (seq == EXT4_MMP_SEQ_CLEAN)
264 goto skip;
265
266 if (seq == EXT4_MMP_SEQ_FSCK) {
267 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
268 goto failed;
269 }
270
271 wait_time = min(mmp_check_interval * 2 + 1,
272 mmp_check_interval + 60);
273
274 /* Print MMP interval if more than 20 secs. */
275 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
276 ext4_warning(sb, "MMP interval %u higher than expected, please"
277 " wait.\n", wait_time * 2);
278
279 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
280 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
281 goto failed;
282 }
283
284 retval = read_mmp_block(sb, &bh, mmp_block);
285 if (retval)
286 goto failed;
287 mmp = (struct mmp_struct *)(bh->b_data);
288 if (seq != le32_to_cpu(mmp->mmp_seq)) {
289 dump_mmp_msg(sb, mmp,
290 "Device is already active on another node.");
291 goto failed;
292 }
293
294skip:
295 /*
296 * write a new random sequence number.
297 */
298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
299
300 retval = write_mmp_block(bh);
301 if (retval)
302 goto failed;
303
304 /*
305 * wait for MMP interval and check mmp_seq.
306 */
307 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
308 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
309 goto failed;
310 }
311
312 retval = read_mmp_block(sb, &bh, mmp_block);
313 if (retval)
314 goto failed;
315 mmp = (struct mmp_struct *)(bh->b_data);
316 if (seq != le32_to_cpu(mmp->mmp_seq)) {
317 dump_mmp_msg(sb, mmp,
318 "Device is already active on another node.");
319 goto failed;
320 }
321
322 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
323 if (!mmpd_data) {
324 ext4_warning(sb, "not enough memory for mmpd_data");
325 goto failed;
326 }
327 mmpd_data->sb = sb;
328 mmpd_data->bh = bh;
329
330 /*
331 * Start a kernel thread to update the MMP block periodically.
332 */
333 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
334 bdevname(bh->b_bdev,
335 mmp->mmp_bdevname));
336 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
337 EXT4_SB(sb)->s_mmp_tsk = NULL;
338 kfree(mmpd_data);
339 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
340 sb->s_id);
341 goto failed;
342 }
343
344 return 0;
345
346failed:
347 brelse(bh);
348 return 1;
349}
350
351
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index b9f3e7862f13..2b8304bf3c50 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
876 * It needs to call wait_on_page_writeback() to wait for the 876 * It needs to call wait_on_page_writeback() to wait for the
877 * writeback of the page. 877 * writeback of the page.
878 */ 878 */
879 if (PageWriteback(page)) 879 wait_on_page_writeback(page);
880 wait_on_page_writeback(page);
881 880
882 /* Release old bh and drop refs */ 881 /* Release old bh and drop refs */
883 try_to_release_page(page, 0); 882 try_to_release_page(page, 0);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 67fd0b025858..b754b7721f51 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1413,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1413 frame->at = entries; 1413 frame->at = entries;
1414 frame->bh = bh; 1414 frame->bh = bh;
1415 bh = bh2; 1415 bh = bh2;
1416
1417 ext4_handle_dirty_metadata(handle, dir, frame->bh);
1418 ext4_handle_dirty_metadata(handle, dir, bh);
1419
1416 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1420 de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1417 dx_release (frames); 1421 if (!de) {
1418 if (!(de)) 1422 /*
1423 * Even if the block split failed, we have to properly write
1424 * out all the changes we did so far. Otherwise we can end up
1425 * with corrupted filesystem.
1426 */
1427 ext4_mark_inode_dirty(handle, dir);
1428 dx_release(frames);
1419 return retval; 1429 return retval;
1430 }
1431 dx_release(frames);
1420 1432
1421 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1433 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1422 brelse(bh); 1434 brelse(bh);
@@ -2240,6 +2252,7 @@ static int ext4_symlink(struct inode *dir,
2240 handle_t *handle; 2252 handle_t *handle;
2241 struct inode *inode; 2253 struct inode *inode;
2242 int l, err, retries = 0; 2254 int l, err, retries = 0;
2255 int credits;
2243 2256
2244 l = strlen(symname)+1; 2257 l = strlen(symname)+1;
2245 if (l > dir->i_sb->s_blocksize) 2258 if (l > dir->i_sb->s_blocksize)
@@ -2247,10 +2260,26 @@ static int ext4_symlink(struct inode *dir,
2247 2260
2248 dquot_initialize(dir); 2261 dquot_initialize(dir);
2249 2262
2263 if (l > EXT4_N_BLOCKS * 4) {
2264 /*
2265 * For non-fast symlinks, we just allocate inode and put it on
2266 * orphan list in the first transaction => we need bitmap,
2267 * group descriptor, sb, inode block, quota blocks.
2268 */
2269 credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2270 } else {
2271 /*
2272 * Fast symlink. We have to add entry to directory
2273 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
2274 * allocate new inode (bitmap, group descriptor, inode block,
2275 * quota blocks, sb is already counted in previous macros).
2276 */
2277 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2278 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2279 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2280 }
2250retry: 2281retry:
2251 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2282 handle = ext4_journal_start(dir, credits);
2252 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2253 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2254 if (IS_ERR(handle)) 2283 if (IS_ERR(handle))
2255 return PTR_ERR(handle); 2284 return PTR_ERR(handle);
2256 2285
@@ -2263,21 +2292,44 @@ retry:
2263 if (IS_ERR(inode)) 2292 if (IS_ERR(inode))
2264 goto out_stop; 2293 goto out_stop;
2265 2294
2266 if (l > sizeof(EXT4_I(inode)->i_data)) { 2295 if (l > EXT4_N_BLOCKS * 4) {
2267 inode->i_op = &ext4_symlink_inode_operations; 2296 inode->i_op = &ext4_symlink_inode_operations;
2268 ext4_set_aops(inode); 2297 ext4_set_aops(inode);
2269 /* 2298 /*
2270 * page_symlink() calls into ext4_prepare/commit_write. 2299 * We cannot call page_symlink() with transaction started
2271 * We have a transaction open. All is sweetness. It also sets 2300 * because it calls into ext4_write_begin() which can wait
2272 * i_size in generic_commit_write(). 2301 * for transaction commit if we are running out of space
2302 * and thus we deadlock. So we have to stop transaction now
2303 * and restart it when symlink contents is written.
2304 *
2305 * To keep fs consistent in case of crash, we have to put inode
2306 * to orphan list in the mean time.
2273 */ 2307 */
2308 drop_nlink(inode);
2309 err = ext4_orphan_add(handle, inode);
2310 ext4_journal_stop(handle);
2311 if (err)
2312 goto err_drop_inode;
2274 err = __page_symlink(inode, symname, l, 1); 2313 err = __page_symlink(inode, symname, l, 1);
2314 if (err)
2315 goto err_drop_inode;
2316 /*
2317 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2318 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2319 */
2320 handle = ext4_journal_start(dir,
2321 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2322 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2323 if (IS_ERR(handle)) {
2324 err = PTR_ERR(handle);
2325 goto err_drop_inode;
2326 }
2327 inc_nlink(inode);
2328 err = ext4_orphan_del(handle, inode);
2275 if (err) { 2329 if (err) {
2330 ext4_journal_stop(handle);
2276 clear_nlink(inode); 2331 clear_nlink(inode);
2277 unlock_new_inode(inode); 2332 goto err_drop_inode;
2278 ext4_mark_inode_dirty(handle, inode);
2279 iput(inode);
2280 goto out_stop;
2281 } 2333 }
2282 } else { 2334 } else {
2283 /* clear the extent format for fast symlink */ 2335 /* clear the extent format for fast symlink */
@@ -2293,6 +2345,10 @@ out_stop:
2293 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2345 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2294 goto retry; 2346 goto retry;
2295 return err; 2347 return err;
2348err_drop_inode:
2349 unlock_new_inode(inode);
2350 iput(inode);
2351 return err;
2296} 2352}
2297 2353
2298static int ext4_link(struct dentry *old_dentry, 2354static int ext4_link(struct dentry *old_dentry,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b6dbd056fcb1..7bb8f76d470a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -203,46 +203,29 @@ static void ext4_end_bio(struct bio *bio, int error)
203 for (i = 0; i < io_end->num_io_pages; i++) { 203 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page; 204 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head; 205 struct buffer_head *bh, *head;
206 int partial_write = 0; 206 loff_t offset;
207 loff_t io_end_offset;
207 208
208 head = page_buffers(page); 209 if (error) {
209 if (error)
210 SetPageError(page); 210 SetPageError(page);
211 BUG_ON(!head); 211 set_bit(AS_EIO, &page->mapping->flags);
212 if (head->b_size != PAGE_CACHE_SIZE) { 212 head = page_buffers(page);
213 loff_t offset; 213 BUG_ON(!head);
214 loff_t io_end_offset = io_end->offset + io_end->size; 214
215 io_end_offset = io_end->offset + io_end->size;
215 216
216 offset = (sector_t) page->index << PAGE_CACHE_SHIFT; 217 offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
217 bh = head; 218 bh = head;
218 do { 219 do {
219 if ((offset >= io_end->offset) && 220 if ((offset >= io_end->offset) &&
220 (offset+bh->b_size <= io_end_offset)) { 221 (offset+bh->b_size <= io_end_offset))
221 if (error) 222 buffer_io_error(bh);
222 buffer_io_error(bh); 223
223
224 }
225 if (buffer_delay(bh))
226 partial_write = 1;
227 else if (!buffer_mapped(bh))
228 clear_buffer_dirty(bh);
229 else if (buffer_dirty(bh))
230 partial_write = 1;
231 offset += bh->b_size; 224 offset += bh->b_size;
232 bh = bh->b_this_page; 225 bh = bh->b_this_page;
233 } while (bh != head); 226 } while (bh != head);
234 } 227 }
235 228
236 /*
237 * If this is a partial write which happened to make
238 * all buffers uptodate then we can optimize away a
239 * bogus readpage() for the next read(). Here we
240 * 'discover' whether the page went uptodate as a
241 * result of this (potentially partial) write.
242 */
243 if (!partial_write)
244 SetPageUptodate(page);
245
246 put_io_page(io_end->pages[i]); 229 put_io_page(io_end->pages[i]);
247 } 230 }
248 io_end->num_io_pages = 0; 231 io_end->num_io_pages = 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8553dfb310af..cc5c157aa11d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -38,6 +38,7 @@
38#include <linux/ctype.h> 38#include <linux/ctype.h>
39#include <linux/log2.h> 39#include <linux/log2.h>
40#include <linux/crc16.h> 40#include <linux/crc16.h>
41#include <linux/cleancache.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42 43
43#include <linux/kthread.h> 44#include <linux/kthread.h>
@@ -75,11 +76,27 @@ static void ext4_write_super(struct super_block *sb);
75static int ext4_freeze(struct super_block *sb); 76static int ext4_freeze(struct super_block *sb);
76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 77static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 78 const char *dev_name, void *data);
79static inline int ext2_feature_set_ok(struct super_block *sb);
80static inline int ext3_feature_set_ok(struct super_block *sb);
78static int ext4_feature_set_ok(struct super_block *sb, int readonly); 81static int ext4_feature_set_ok(struct super_block *sb, int readonly);
79static void ext4_destroy_lazyinit_thread(void); 82static void ext4_destroy_lazyinit_thread(void);
80static void ext4_unregister_li_request(struct super_block *sb); 83static void ext4_unregister_li_request(struct super_block *sb);
81static void ext4_clear_request_list(void); 84static void ext4_clear_request_list(void);
82 85
86#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
87static struct file_system_type ext2_fs_type = {
88 .owner = THIS_MODULE,
89 .name = "ext2",
90 .mount = ext4_mount,
91 .kill_sb = kill_block_super,
92 .fs_flags = FS_REQUIRES_DEV,
93};
94#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
95#else
96#define IS_EXT2_SB(sb) (0)
97#endif
98
99
83#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 100#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
84static struct file_system_type ext3_fs_type = { 101static struct file_system_type ext3_fs_type = {
85 .owner = THIS_MODULE, 102 .owner = THIS_MODULE,
@@ -806,6 +823,8 @@ static void ext4_put_super(struct super_block *sb)
806 invalidate_bdev(sbi->journal_bdev); 823 invalidate_bdev(sbi->journal_bdev);
807 ext4_blkdev_remove(sbi); 824 ext4_blkdev_remove(sbi);
808 } 825 }
826 if (sbi->s_mmp_tsk)
827 kthread_stop(sbi->s_mmp_tsk);
809 sb->s_fs_info = NULL; 828 sb->s_fs_info = NULL;
810 /* 829 /*
811 * Now that we are completely done shutting down the 830 * Now that we are completely done shutting down the
@@ -1096,7 +1115,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1096 1115
1097 if (!test_opt(sb, INIT_INODE_TABLE)) 1116 if (!test_opt(sb, INIT_INODE_TABLE))
1098 seq_puts(seq, ",noinit_inode_table"); 1117 seq_puts(seq, ",noinit_inode_table");
1099 else if (sbi->s_li_wait_mult) 1118 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
1100 seq_printf(seq, ",init_inode_table=%u", 1119 seq_printf(seq, ",init_inode_table=%u",
1101 (unsigned) sbi->s_li_wait_mult); 1120 (unsigned) sbi->s_li_wait_mult);
1102 1121
@@ -1187,9 +1206,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
1187 const char *data, size_t len, loff_t off); 1206 const char *data, size_t len, loff_t off);
1188 1207
1189static const struct dquot_operations ext4_quota_operations = { 1208static const struct dquot_operations ext4_quota_operations = {
1190#ifdef CONFIG_QUOTA
1191 .get_reserved_space = ext4_get_reserved_space, 1209 .get_reserved_space = ext4_get_reserved_space,
1192#endif
1193 .write_dquot = ext4_write_dquot, 1210 .write_dquot = ext4_write_dquot,
1194 .acquire_dquot = ext4_acquire_dquot, 1211 .acquire_dquot = ext4_acquire_dquot,
1195 .release_dquot = ext4_release_dquot, 1212 .release_dquot = ext4_release_dquot,
@@ -1900,7 +1917,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1900 ext4_msg(sb, KERN_WARNING, 1917 ext4_msg(sb, KERN_WARNING,
1901 "warning: mounting fs with errors, " 1918 "warning: mounting fs with errors, "
1902 "running e2fsck is recommended"); 1919 "running e2fsck is recommended");
1903 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1920 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1904 le16_to_cpu(es->s_mnt_count) >= 1921 le16_to_cpu(es->s_mnt_count) >=
1905 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1922 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1906 ext4_msg(sb, KERN_WARNING, 1923 ext4_msg(sb, KERN_WARNING,
@@ -1932,6 +1949,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1932 EXT4_INODES_PER_GROUP(sb), 1949 EXT4_INODES_PER_GROUP(sb),
1933 sbi->s_mount_opt, sbi->s_mount_opt2); 1950 sbi->s_mount_opt, sbi->s_mount_opt2);
1934 1951
1952 cleancache_init_fs(sb);
1935 return res; 1953 return res;
1936} 1954}
1937 1955
@@ -2425,6 +2443,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2425 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2443 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2426} 2444}
2427 2445
2446static ssize_t extent_cache_hits_show(struct ext4_attr *a,
2447 struct ext4_sb_info *sbi, char *buf)
2448{
2449 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
2450}
2451
2452static ssize_t extent_cache_misses_show(struct ext4_attr *a,
2453 struct ext4_sb_info *sbi, char *buf)
2454{
2455 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
2456}
2457
2428static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2458static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2429 struct ext4_sb_info *sbi, 2459 struct ext4_sb_info *sbi,
2430 const char *buf, size_t count) 2460 const char *buf, size_t count)
@@ -2482,6 +2512,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2482EXT4_RO_ATTR(delayed_allocation_blocks); 2512EXT4_RO_ATTR(delayed_allocation_blocks);
2483EXT4_RO_ATTR(session_write_kbytes); 2513EXT4_RO_ATTR(session_write_kbytes);
2484EXT4_RO_ATTR(lifetime_write_kbytes); 2514EXT4_RO_ATTR(lifetime_write_kbytes);
2515EXT4_RO_ATTR(extent_cache_hits);
2516EXT4_RO_ATTR(extent_cache_misses);
2485EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2517EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2486 inode_readahead_blks_store, s_inode_readahead_blks); 2518 inode_readahead_blks_store, s_inode_readahead_blks);
2487EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2519EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@ -2497,6 +2529,8 @@ static struct attribute *ext4_attrs[] = {
2497 ATTR_LIST(delayed_allocation_blocks), 2529 ATTR_LIST(delayed_allocation_blocks),
2498 ATTR_LIST(session_write_kbytes), 2530 ATTR_LIST(session_write_kbytes),
2499 ATTR_LIST(lifetime_write_kbytes), 2531 ATTR_LIST(lifetime_write_kbytes),
2532 ATTR_LIST(extent_cache_hits),
2533 ATTR_LIST(extent_cache_misses),
2500 ATTR_LIST(inode_readahead_blks), 2534 ATTR_LIST(inode_readahead_blks),
2501 ATTR_LIST(inode_goal), 2535 ATTR_LIST(inode_goal),
2502 ATTR_LIST(mb_stats), 2536 ATTR_LIST(mb_stats),
@@ -2659,12 +2693,6 @@ static void print_daily_error_info(unsigned long arg)
2659 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2693 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
2660} 2694}
2661 2695
2662static void ext4_lazyinode_timeout(unsigned long data)
2663{
2664 struct task_struct *p = (struct task_struct *)data;
2665 wake_up_process(p);
2666}
2667
2668/* Find next suitable group and run ext4_init_inode_table */ 2696/* Find next suitable group and run ext4_init_inode_table */
2669static int ext4_run_li_request(struct ext4_li_request *elr) 2697static int ext4_run_li_request(struct ext4_li_request *elr)
2670{ 2698{
@@ -2696,11 +2724,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2696 ret = ext4_init_inode_table(sb, group, 2724 ret = ext4_init_inode_table(sb, group,
2697 elr->lr_timeout ? 0 : 1); 2725 elr->lr_timeout ? 0 : 1);
2698 if (elr->lr_timeout == 0) { 2726 if (elr->lr_timeout == 0) {
2699 timeout = jiffies - timeout; 2727 timeout = (jiffies - timeout) *
2700 if (elr->lr_sbi->s_li_wait_mult) 2728 elr->lr_sbi->s_li_wait_mult;
2701 timeout *= elr->lr_sbi->s_li_wait_mult;
2702 else
2703 timeout *= 20;
2704 elr->lr_timeout = timeout; 2729 elr->lr_timeout = timeout;
2705 } 2730 }
2706 elr->lr_next_sched = jiffies + elr->lr_timeout; 2731 elr->lr_next_sched = jiffies + elr->lr_timeout;
@@ -2712,7 +2737,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2712 2737
2713/* 2738/*
2714 * Remove lr_request from the list_request and free the 2739 * Remove lr_request from the list_request and free the
2715 * request tructure. Should be called with li_list_mtx held 2740 * request structure. Should be called with li_list_mtx held
2716 */ 2741 */
2717static void ext4_remove_li_request(struct ext4_li_request *elr) 2742static void ext4_remove_li_request(struct ext4_li_request *elr)
2718{ 2743{
@@ -2730,14 +2755,16 @@ static void ext4_remove_li_request(struct ext4_li_request *elr)
2730 2755
2731static void ext4_unregister_li_request(struct super_block *sb) 2756static void ext4_unregister_li_request(struct super_block *sb)
2732{ 2757{
2733 struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; 2758 mutex_lock(&ext4_li_mtx);
2734 2759 if (!ext4_li_info) {
2735 if (!ext4_li_info) 2760 mutex_unlock(&ext4_li_mtx);
2736 return; 2761 return;
2762 }
2737 2763
2738 mutex_lock(&ext4_li_info->li_list_mtx); 2764 mutex_lock(&ext4_li_info->li_list_mtx);
2739 ext4_remove_li_request(elr); 2765 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2740 mutex_unlock(&ext4_li_info->li_list_mtx); 2766 mutex_unlock(&ext4_li_info->li_list_mtx);
2767 mutex_unlock(&ext4_li_mtx);
2741} 2768}
2742 2769
2743static struct task_struct *ext4_lazyinit_task; 2770static struct task_struct *ext4_lazyinit_task;
@@ -2756,17 +2783,10 @@ static int ext4_lazyinit_thread(void *arg)
2756 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2783 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2757 struct list_head *pos, *n; 2784 struct list_head *pos, *n;
2758 struct ext4_li_request *elr; 2785 struct ext4_li_request *elr;
2759 unsigned long next_wakeup; 2786 unsigned long next_wakeup, cur;
2760 DEFINE_WAIT(wait);
2761 2787
2762 BUG_ON(NULL == eli); 2788 BUG_ON(NULL == eli);
2763 2789
2764 eli->li_timer.data = (unsigned long)current;
2765 eli->li_timer.function = ext4_lazyinode_timeout;
2766
2767 eli->li_task = current;
2768 wake_up(&eli->li_wait_task);
2769
2770cont_thread: 2790cont_thread:
2771 while (true) { 2791 while (true) {
2772 next_wakeup = MAX_JIFFY_OFFSET; 2792 next_wakeup = MAX_JIFFY_OFFSET;
@@ -2797,19 +2817,15 @@ cont_thread:
2797 if (freezing(current)) 2817 if (freezing(current))
2798 refrigerator(); 2818 refrigerator();
2799 2819
2800 if ((time_after_eq(jiffies, next_wakeup)) || 2820 cur = jiffies;
2821 if ((time_after_eq(cur, next_wakeup)) ||
2801 (MAX_JIFFY_OFFSET == next_wakeup)) { 2822 (MAX_JIFFY_OFFSET == next_wakeup)) {
2802 cond_resched(); 2823 cond_resched();
2803 continue; 2824 continue;
2804 } 2825 }
2805 2826
2806 eli->li_timer.expires = next_wakeup; 2827 schedule_timeout_interruptible(next_wakeup - cur);
2807 add_timer(&eli->li_timer); 2828
2808 prepare_to_wait(&eli->li_wait_daemon, &wait,
2809 TASK_INTERRUPTIBLE);
2810 if (time_before(jiffies, next_wakeup))
2811 schedule();
2812 finish_wait(&eli->li_wait_daemon, &wait);
2813 if (kthread_should_stop()) { 2829 if (kthread_should_stop()) {
2814 ext4_clear_request_list(); 2830 ext4_clear_request_list();
2815 goto exit_thread; 2831 goto exit_thread;
@@ -2833,12 +2849,7 @@ exit_thread:
2833 goto cont_thread; 2849 goto cont_thread;
2834 } 2850 }
2835 mutex_unlock(&eli->li_list_mtx); 2851 mutex_unlock(&eli->li_list_mtx);
2836 del_timer_sync(&ext4_li_info->li_timer);
2837 eli->li_task = NULL;
2838 wake_up(&eli->li_wait_task);
2839
2840 kfree(ext4_li_info); 2852 kfree(ext4_li_info);
2841 ext4_lazyinit_task = NULL;
2842 ext4_li_info = NULL; 2853 ext4_li_info = NULL;
2843 mutex_unlock(&ext4_li_mtx); 2854 mutex_unlock(&ext4_li_mtx);
2844 2855
@@ -2866,7 +2877,6 @@ static int ext4_run_lazyinit_thread(void)
2866 if (IS_ERR(ext4_lazyinit_task)) { 2877 if (IS_ERR(ext4_lazyinit_task)) {
2867 int err = PTR_ERR(ext4_lazyinit_task); 2878 int err = PTR_ERR(ext4_lazyinit_task);
2868 ext4_clear_request_list(); 2879 ext4_clear_request_list();
2869 del_timer_sync(&ext4_li_info->li_timer);
2870 kfree(ext4_li_info); 2880 kfree(ext4_li_info);
2871 ext4_li_info = NULL; 2881 ext4_li_info = NULL;
2872 printk(KERN_CRIT "EXT4: error %d creating inode table " 2882 printk(KERN_CRIT "EXT4: error %d creating inode table "
@@ -2875,8 +2885,6 @@ static int ext4_run_lazyinit_thread(void)
2875 return err; 2885 return err;
2876 } 2886 }
2877 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 2887 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2878
2879 wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
2880 return 0; 2888 return 0;
2881} 2889}
2882 2890
@@ -2911,13 +2919,9 @@ static int ext4_li_info_new(void)
2911 if (!eli) 2919 if (!eli)
2912 return -ENOMEM; 2920 return -ENOMEM;
2913 2921
2914 eli->li_task = NULL;
2915 INIT_LIST_HEAD(&eli->li_request_list); 2922 INIT_LIST_HEAD(&eli->li_request_list);
2916 mutex_init(&eli->li_list_mtx); 2923 mutex_init(&eli->li_list_mtx);
2917 2924
2918 init_waitqueue_head(&eli->li_wait_daemon);
2919 init_waitqueue_head(&eli->li_wait_task);
2920 init_timer(&eli->li_timer);
2921 eli->li_state |= EXT4_LAZYINIT_QUIT; 2925 eli->li_state |= EXT4_LAZYINIT_QUIT;
2922 2926
2923 ext4_li_info = eli; 2927 ext4_li_info = eli;
@@ -2960,20 +2964,19 @@ static int ext4_register_li_request(struct super_block *sb,
2960 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2964 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2961 int ret = 0; 2965 int ret = 0;
2962 2966
2963 if (sbi->s_li_request != NULL) 2967 if (sbi->s_li_request != NULL) {
2968 /*
2969 * Reset timeout so it can be computed again, because
2970 * s_li_wait_mult might have changed.
2971 */
2972 sbi->s_li_request->lr_timeout = 0;
2964 return 0; 2973 return 0;
2974 }
2965 2975
2966 if (first_not_zeroed == ngroups || 2976 if (first_not_zeroed == ngroups ||
2967 (sb->s_flags & MS_RDONLY) || 2977 (sb->s_flags & MS_RDONLY) ||
2968 !test_opt(sb, INIT_INODE_TABLE)) { 2978 !test_opt(sb, INIT_INODE_TABLE))
2969 sbi->s_li_request = NULL;
2970 return 0; 2979 return 0;
2971 }
2972
2973 if (first_not_zeroed == ngroups) {
2974 sbi->s_li_request = NULL;
2975 return 0;
2976 }
2977 2980
2978 elr = ext4_li_request_new(sb, first_not_zeroed); 2981 elr = ext4_li_request_new(sb, first_not_zeroed);
2979 if (!elr) 2982 if (!elr)
@@ -3166,6 +3169,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3166 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3169 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3167 set_opt(sb, DELALLOC); 3170 set_opt(sb, DELALLOC);
3168 3171
3172 /*
3173 * set default s_li_wait_mult for lazyinit, for the case there is
3174 * no mount option specified.
3175 */
3176 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3177
3169 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3178 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3170 &journal_devnum, &journal_ioprio, NULL, 0)) { 3179 &journal_devnum, &journal_ioprio, NULL, 0)) {
3171 ext4_msg(sb, KERN_WARNING, 3180 ext4_msg(sb, KERN_WARNING,
@@ -3187,6 +3196,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3187 "feature flags set on rev 0 fs, " 3196 "feature flags set on rev 0 fs, "
3188 "running e2fsck is recommended"); 3197 "running e2fsck is recommended");
3189 3198
3199 if (IS_EXT2_SB(sb)) {
3200 if (ext2_feature_set_ok(sb))
3201 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3202 "using the ext4 subsystem");
3203 else {
3204 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3205 "to feature incompatibilities");
3206 goto failed_mount;
3207 }
3208 }
3209
3210 if (IS_EXT3_SB(sb)) {
3211 if (ext3_feature_set_ok(sb))
3212 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3213 "using the ext4 subsystem");
3214 else {
3215 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3216 "to feature incompatibilities");
3217 goto failed_mount;
3218 }
3219 }
3220
3190 /* 3221 /*
3191 * Check feature flags regardless of the revision level, since we 3222 * Check feature flags regardless of the revision level, since we
3192 * previously didn't change the revision level when setting the flags, 3223 * previously didn't change the revision level when setting the flags,
@@ -3459,6 +3490,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3459 EXT4_HAS_INCOMPAT_FEATURE(sb, 3490 EXT4_HAS_INCOMPAT_FEATURE(sb,
3460 EXT4_FEATURE_INCOMPAT_RECOVER)); 3491 EXT4_FEATURE_INCOMPAT_RECOVER));
3461 3492
3493 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3494 !(sb->s_flags & MS_RDONLY))
3495 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3496 goto failed_mount3;
3497
3462 /* 3498 /*
3463 * The first inode we look at is the journal inode. Don't try 3499 * The first inode we look at is the journal inode. Don't try
3464 * root first: it may be modified in the journal! 3500 * root first: it may be modified in the journal!
@@ -3474,7 +3510,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3474 goto failed_mount_wq; 3510 goto failed_mount_wq;
3475 } else { 3511 } else {
3476 clear_opt(sb, DATA_FLAGS); 3512 clear_opt(sb, DATA_FLAGS);
3477 set_opt(sb, WRITEBACK_DATA);
3478 sbi->s_journal = NULL; 3513 sbi->s_journal = NULL;
3479 needs_recovery = 0; 3514 needs_recovery = 0;
3480 goto no_journal; 3515 goto no_journal;
@@ -3707,6 +3742,8 @@ failed_mount3:
3707 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3742 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3708 percpu_counter_destroy(&sbi->s_dirs_counter); 3743 percpu_counter_destroy(&sbi->s_dirs_counter);
3709 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3744 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
3745 if (sbi->s_mmp_tsk)
3746 kthread_stop(sbi->s_mmp_tsk);
3710failed_mount2: 3747failed_mount2:
3711 for (i = 0; i < db_count; i++) 3748 for (i = 0; i < db_count; i++)
3712 brelse(sbi->s_group_desc[i]); 3749 brelse(sbi->s_group_desc[i]);
@@ -4242,7 +4279,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4242 int enable_quota = 0; 4279 int enable_quota = 0;
4243 ext4_group_t g; 4280 ext4_group_t g;
4244 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4281 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4245 int err; 4282 int err = 0;
4246#ifdef CONFIG_QUOTA 4283#ifdef CONFIG_QUOTA
4247 int i; 4284 int i;
4248#endif 4285#endif
@@ -4368,6 +4405,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4368 goto restore_opts; 4405 goto restore_opts;
4369 if (!ext4_setup_super(sb, es, 0)) 4406 if (!ext4_setup_super(sb, es, 0))
4370 sb->s_flags &= ~MS_RDONLY; 4407 sb->s_flags &= ~MS_RDONLY;
4408 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
4409 EXT4_FEATURE_INCOMPAT_MMP))
4410 if (ext4_multi_mount_protect(sb,
4411 le64_to_cpu(es->s_mmp_block))) {
4412 err = -EROFS;
4413 goto restore_opts;
4414 }
4371 enable_quota = 1; 4415 enable_quota = 1;
4372 } 4416 }
4373 } 4417 }
@@ -4432,6 +4476,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4432 struct ext4_sb_info *sbi = EXT4_SB(sb); 4476 struct ext4_sb_info *sbi = EXT4_SB(sb);
4433 struct ext4_super_block *es = sbi->s_es; 4477 struct ext4_super_block *es = sbi->s_es;
4434 u64 fsid; 4478 u64 fsid;
4479 s64 bfree;
4435 4480
4436 if (test_opt(sb, MINIX_DF)) { 4481 if (test_opt(sb, MINIX_DF)) {
4437 sbi->s_overhead_last = 0; 4482 sbi->s_overhead_last = 0;
@@ -4475,8 +4520,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4475 buf->f_type = EXT4_SUPER_MAGIC; 4520 buf->f_type = EXT4_SUPER_MAGIC;
4476 buf->f_bsize = sb->s_blocksize; 4521 buf->f_bsize = sb->s_blocksize;
4477 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4522 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
4478 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4523 bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
4479 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4524 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
4525 /* prevent underflow in case that few free space is available */
4526 buf->f_bfree = max_t(s64, bfree, 0);
4480 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4527 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4481 if (buf->f_bfree < ext4_r_blocks_count(es)) 4528 if (buf->f_bfree < ext4_r_blocks_count(es))
4482 buf->f_bavail = 0; 4529 buf->f_bavail = 0;
@@ -4652,6 +4699,9 @@ static int ext4_quota_off(struct super_block *sb, int type)
4652 if (test_opt(sb, DELALLOC)) 4699 if (test_opt(sb, DELALLOC))
4653 sync_filesystem(sb); 4700 sync_filesystem(sb);
4654 4701
4702 if (!inode)
4703 goto out;
4704
4655 /* Update modification times of quota files when userspace can 4705 /* Update modification times of quota files when userspace can
4656 * start looking at them */ 4706 * start looking at them */
4657 handle = ext4_journal_start(inode, 1); 4707 handle = ext4_journal_start(inode, 1);
@@ -4772,14 +4822,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
4772} 4822}
4773 4823
4774#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4824#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4775static struct file_system_type ext2_fs_type = {
4776 .owner = THIS_MODULE,
4777 .name = "ext2",
4778 .mount = ext4_mount,
4779 .kill_sb = kill_block_super,
4780 .fs_flags = FS_REQUIRES_DEV,
4781};
4782
4783static inline void register_as_ext2(void) 4825static inline void register_as_ext2(void)
4784{ 4826{
4785 int err = register_filesystem(&ext2_fs_type); 4827 int err = register_filesystem(&ext2_fs_type);
@@ -4792,10 +4834,22 @@ static inline void unregister_as_ext2(void)
4792{ 4834{
4793 unregister_filesystem(&ext2_fs_type); 4835 unregister_filesystem(&ext2_fs_type);
4794} 4836}
4837
4838static inline int ext2_feature_set_ok(struct super_block *sb)
4839{
4840 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
4841 return 0;
4842 if (sb->s_flags & MS_RDONLY)
4843 return 1;
4844 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
4845 return 0;
4846 return 1;
4847}
4795MODULE_ALIAS("ext2"); 4848MODULE_ALIAS("ext2");
4796#else 4849#else
4797static inline void register_as_ext2(void) { } 4850static inline void register_as_ext2(void) { }
4798static inline void unregister_as_ext2(void) { } 4851static inline void unregister_as_ext2(void) { }
4852static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
4799#endif 4853#endif
4800 4854
4801#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4855#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
@@ -4811,10 +4865,24 @@ static inline void unregister_as_ext3(void)
4811{ 4865{
4812 unregister_filesystem(&ext3_fs_type); 4866 unregister_filesystem(&ext3_fs_type);
4813} 4867}
4868
4869static inline int ext3_feature_set_ok(struct super_block *sb)
4870{
4871 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
4872 return 0;
4873 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
4874 return 0;
4875 if (sb->s_flags & MS_RDONLY)
4876 return 1;
4877 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
4878 return 0;
4879 return 1;
4880}
4814MODULE_ALIAS("ext3"); 4881MODULE_ALIAS("ext3");
4815#else 4882#else
4816static inline void register_as_ext3(void) { } 4883static inline void register_as_ext3(void) { }
4817static inline void unregister_as_ext3(void) { } 4884static inline void unregister_as_ext3(void) { }
4885static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
4818#endif 4886#endif
4819 4887
4820static struct file_system_type ext4_fs_type = { 4888static struct file_system_type ext4_fs_type = {
@@ -4898,8 +4966,8 @@ static int __init ext4_init_fs(void)
4898 err = init_inodecache(); 4966 err = init_inodecache();
4899 if (err) 4967 if (err)
4900 goto out1; 4968 goto out1;
4901 register_as_ext2();
4902 register_as_ext3(); 4969 register_as_ext3();
4970 register_as_ext2();
4903 err = register_filesystem(&ext4_fs_type); 4971 err = register_filesystem(&ext4_fs_type);
4904 if (err) 4972 if (err)
4905 goto out; 4973 goto out;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b545ca1c459c..c757adc97250 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,8 @@ inserted:
820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 822
823 block = ext4_new_meta_blocks(handle, inode, 823 block = ext4_new_meta_blocks(handle, inode, goal, 0,
824 goal, NULL, &error); 824 NULL, &error);
825 if (error) 825 if (error)
826 goto cleanup; 826 goto cleanup;
827 827
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock); 151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
152 152
153 tmp = fat_cache_alloc(inode); 153 tmp = fat_cache_alloc(inode);
154 if (!tmp) {
155 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
156 MSDOS_I(inode)->nr_caches--;
157 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
158 return;
159 }
160
154 spin_lock(&MSDOS_I(inode)->cache_lru_lock); 161 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
155 cache = fat_cache_merge(inode, new); 162 cache = fat_cache_merge(inode, new);
156 if (cache != NULL) { 163 if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
98 98
99 *bh = sb_bread(sb, phys); 99 *bh = sb_bread(sb, phys);
100 if (*bh == NULL) { 100 if (*bh == NULL) {
101 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 101 fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
102 (llu)phys); 102 (llu)phys);
103 /* skip this block */ 103 /* skip this block */
104 *pos = (iblock + 1) << sb->s_blocksize_bits; 104 *pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
136 * but ignore that right now. 136 * but ignore that right now.
137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
138 */ 138 */
139static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, 139static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
140 int uni_xlate, struct nls_table *nls) 140 const wchar_t *uni, int len, struct nls_table *nls)
141{ 141{
142 int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
142 const wchar_t *ip; 143 const wchar_t *ip;
143 wchar_t ec; 144 wchar_t ec;
144 unsigned char *op; 145 unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
166 } 167 }
167 168
168 if (unlikely(*ip)) { 169 if (unlikely(*ip)) {
169 printk(KERN_WARNING "FAT: filename was truncated while " 170 fat_msg(sb, KERN_WARNING, "filename was truncated while "
170 "converting."); 171 "converting.");
171 } 172 }
172 173
173 *op = 0; 174 *op = 0;
174 return (op - ascii); 175 return (op - ascii);
175} 176}
176 177
177static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, 178static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
178 unsigned char *buf, int size) 179 unsigned char *buf, int size)
179{ 180{
181 struct msdos_sb_info *sbi = MSDOS_SB(sb);
180 if (sbi->options.utf8) 182 if (sbi->options.utf8)
181 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, 183 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
182 UTF16_HOST_ENDIAN, buf, size); 184 UTF16_HOST_ENDIAN, buf, size);
183 else 185 else
184 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, 186 return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
185 sbi->nls_io);
186} 187}
187 188
188static inline int 189static inline int
@@ -419,7 +420,7 @@ parse_record:
419 420
420 /* Compare shortname */ 421 /* Compare shortname */
421 bufuname[last_u] = 0x0000; 422 bufuname[last_u] = 0x0000;
422 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 423 len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
423 if (fat_name_match(sbi, name, name_len, bufname, len)) 424 if (fat_name_match(sbi, name, name_len, bufname, len))
424 goto found; 425 goto found;
425 426
@@ -428,7 +429,7 @@ parse_record:
428 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 429 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
429 430
430 /* Compare longname */ 431 /* Compare longname */
431 len = fat_uni_to_x8(sbi, unicode, longname, size); 432 len = fat_uni_to_x8(sb, unicode, longname, size);
432 if (fat_name_match(sbi, name, name_len, longname, len)) 433 if (fat_name_match(sbi, name, name_len, longname, len))
433 goto found; 434 goto found;
434 } 435 }
@@ -545,7 +546,7 @@ parse_record:
545 if (nr_slots) { 546 if (nr_slots) {
546 void *longname = unicode + FAT_MAX_UNI_CHARS; 547 void *longname = unicode + FAT_MAX_UNI_CHARS;
547 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 548 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
548 int len = fat_uni_to_x8(sbi, unicode, longname, size); 549 int len = fat_uni_to_x8(sb, unicode, longname, size);
549 550
550 fill_name = longname; 551 fill_name = longname;
551 fill_len = len; 552 fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
621 622
622 if (isvfat) { 623 if (isvfat) {
623 bufuname[j] = 0x0000; 624 bufuname[j] = 0x0000;
624 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 625 i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
625 } 626 }
626 if (nr_slots) { 627 if (nr_slots) {
627 /* hack for fat_ioctl_filldir() */ 628 /* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
979 980
980int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) 981int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
981{ 982{
983 struct super_block *sb = dir->i_sb;
982 struct msdos_dir_entry *de; 984 struct msdos_dir_entry *de;
983 struct buffer_head *bh; 985 struct buffer_head *bh;
984 int err = 0, nr_slots; 986 int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
1013 */ 1015 */
1014 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); 1016 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
1015 if (err) { 1017 if (err) {
1016 printk(KERN_WARNING 1018 fat_msg(sb, KERN_WARNING,
1017 "FAT: Couldn't remove the long name slots\n"); 1019 "Couldn't remove the long name slots");
1018 } 1020 }
1019 } 1021 }
1020 1022
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
1265 if (sbi->fat_bits != 32) 1267 if (sbi->fat_bits != 32)
1266 goto error; 1268 goto error;
1267 } else if (MSDOS_I(dir)->i_start == 0) { 1269 } else if (MSDOS_I(dir)->i_start == 0) {
1268 printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n", 1270 fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
1269 MSDOS_I(dir)->i_pos); 1271 MSDOS_I(dir)->i_pos);
1270 err = -EIO; 1272 err = -EIO;
1271 goto error; 1273 goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
319 struct msdos_dir_entry *de, loff_t i_pos); 319 struct msdos_dir_entry *de, loff_t i_pos);
320extern int fat_sync_inode(struct inode *inode); 320extern int fat_sync_inode(struct inode *inode);
321extern int fat_fill_super(struct super_block *sb, void *data, int silent, 321extern int fat_fill_super(struct super_block *sb, void *data, int silent,
322 const struct inode_operations *fs_dir_inode_ops, 322 int isvfat, void (*setup)(struct super_block *));
323 int isvfat, void (*setup)(struct super_block *));
324 323
325extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 324extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
326 struct inode *i2); 325 struct inode *i2);
327/* fat/misc.c */ 326/* fat/misc.c */
328extern void 327extern void
329__fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 328__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
329 __attribute__ ((format (printf, 3, 4))) __cold;
330#define fat_fs_error(sb, fmt, args...) \
331 __fat_fs_error(sb, 1, fmt , ## args)
332#define fat_fs_error_ratelimit(sb, fmt, args...) \
333 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
334void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
330 __attribute__ ((format (printf, 3, 4))) __cold; 335 __attribute__ ((format (printf, 3, 4))) __cold;
331#define fat_fs_error(s, fmt, args...) \
332 __fat_fs_error(s, 1, fmt , ## args)
333#define fat_fs_error_ratelimit(s, fmt, args...) \
334 __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
335extern int fat_clusters_flush(struct super_block *sb); 336extern int fat_clusters_flush(struct super_block *sb);
336extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 337extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
337extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, 338extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
95err_brelse: 95err_brelse:
96 brelse(bhs[0]); 96 brelse(bhs[0]);
97err: 97err:
98 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); 98 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
99 return -EIO; 99 return -EIO;
100} 100}
101 101
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode; 108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
109 fatent->bhs[0] = sb_bread(sb, blocknr); 109 fatent->bhs[0] = sb_bread(sb, blocknr);
110 if (!fatent->bhs[0]) { 110 if (!fatent->bhs[0]) {
111 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 111 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
112 (llu)blocknr); 112 (llu)blocknr);
113 return -EIO; 113 return -EIO;
114 } 114 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
581 buf->f_bavail = sbi->free_clusters; 581 buf->f_bavail = sbi->free_clusters;
582 buf->f_fsid.val[0] = (u32)id; 582 buf->f_fsid.val[0] = (u32)id;
583 buf->f_fsid.val[1] = (u32)(id >> 32); 583 buf->f_fsid.val[1] = (u32)(id >> 32);
584 buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12; 584 buf->f_namelen =
585 (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
585 586
586 return 0; 587 return 0;
587} 588}
@@ -619,8 +620,8 @@ retry:
619 620
620 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 621 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
621 if (!bh) { 622 if (!bh) {
622 printk(KERN_ERR "FAT: unable to read inode block " 623 fat_msg(sb, KERN_ERR, "unable to read inode block "
623 "for updating (i_pos %lld)\n", i_pos); 624 "for updating (i_pos %lld)", i_pos);
624 return -EIO; 625 return -EIO;
625 } 626 }
626 spin_lock(&sbi->inode_hash_lock); 627 spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
976 {Opt_err, NULL} 977 {Opt_err, NULL}
977}; 978};
978 979
979static int parse_options(char *options, int is_vfat, int silent, int *debug, 980static int parse_options(struct super_block *sb, char *options, int is_vfat,
980 struct fat_mount_options *opts) 981 int silent, int *debug, struct fat_mount_options *opts)
981{ 982{
982 char *p; 983 char *p;
983 substring_t args[MAX_OPT_ARGS]; 984 substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1168 1169
1169 /* obsolete mount options */ 1170 /* obsolete mount options */
1170 case Opt_obsolate: 1171 case Opt_obsolate:
1171 printk(KERN_INFO "FAT: \"%s\" option is obsolete, " 1172 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1172 "not supported now\n", p); 1173 "not supported now", p);
1173 break; 1174 break;
1174 /* unknown option */ 1175 /* unknown option */
1175 default: 1176 default:
1176 if (!silent) { 1177 if (!silent) {
1177 printk(KERN_ERR 1178 fat_msg(sb, KERN_ERR,
1178 "FAT: Unrecognized mount option \"%s\" " 1179 "Unrecognized mount option \"%s\" "
1179 "or missing value\n", p); 1180 "or missing value", p);
1180 } 1181 }
1181 return -EINVAL; 1182 return -EINVAL;
1182 } 1183 }
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1185out: 1186out:
1186 /* UTF-8 doesn't provide FAT semantics */ 1187 /* UTF-8 doesn't provide FAT semantics */
1187 if (!strcmp(opts->iocharset, "utf8")) { 1188 if (!strcmp(opts->iocharset, "utf8")) {
1188 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1189 fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
1189 " for FAT filesystems, filesystem will be " 1190 " for FAT filesystems, filesystem will be "
1190 "case sensitive!\n"); 1191 "case sensitive!\n");
1191 } 1192 }
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
1238/* 1239/*
1239 * Read the super block of an MS-DOS FS. 1240 * Read the super block of an MS-DOS FS.
1240 */ 1241 */
1241int fat_fill_super(struct super_block *sb, void *data, int silent, 1242int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1242 const struct inode_operations *fs_dir_inode_ops, int isvfat,
1243 void (*setup)(struct super_block *)) 1243 void (*setup)(struct super_block *))
1244{ 1244{
1245 struct inode *root_inode = NULL, *fat_inode = NULL; 1245 struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1268 sb->s_magic = MSDOS_SUPER_MAGIC; 1268 sb->s_magic = MSDOS_SUPER_MAGIC;
1269 sb->s_op = &fat_sops; 1269 sb->s_op = &fat_sops;
1270 sb->s_export_op = &fat_export_ops; 1270 sb->s_export_op = &fat_export_ops;
1271 sbi->dir_ops = fs_dir_inode_ops;
1272 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, 1271 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
1273 DEFAULT_RATELIMIT_BURST); 1272 DEFAULT_RATELIMIT_BURST);
1274 1273
1275 error = parse_options(data, isvfat, silent, &debug, &sbi->options); 1274 error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
1276 if (error) 1275 if (error)
1277 goto out_fail; 1276 goto out_fail;
1278 1277
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1282 sb_min_blocksize(sb, 512); 1281 sb_min_blocksize(sb, 512);
1283 bh = sb_bread(sb, 0); 1282 bh = sb_bread(sb, 0);
1284 if (bh == NULL) { 1283 if (bh == NULL) {
1285 printk(KERN_ERR "FAT: unable to read boot sector\n"); 1284 fat_msg(sb, KERN_ERR, "unable to read boot sector");
1286 goto out_fail; 1285 goto out_fail;
1287 } 1286 }
1288 1287
1289 b = (struct fat_boot_sector *) bh->b_data; 1288 b = (struct fat_boot_sector *) bh->b_data;
1290 if (!b->reserved) { 1289 if (!b->reserved) {
1291 if (!silent) 1290 if (!silent)
1292 printk(KERN_ERR "FAT: bogus number of reserved sectors\n"); 1291 fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
1293 brelse(bh); 1292 brelse(bh);
1294 goto out_invalid; 1293 goto out_invalid;
1295 } 1294 }
1296 if (!b->fats) { 1295 if (!b->fats) {
1297 if (!silent) 1296 if (!silent)
1298 printk(KERN_ERR "FAT: bogus number of FAT structure\n"); 1297 fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
1299 brelse(bh); 1298 brelse(bh);
1300 goto out_invalid; 1299 goto out_invalid;
1301 } 1300 }
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1308 media = b->media; 1307 media = b->media;
1309 if (!fat_valid_media(media)) { 1308 if (!fat_valid_media(media)) {
1310 if (!silent) 1309 if (!silent)
1311 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1310 fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
1312 media); 1311 media);
1313 brelse(bh); 1312 brelse(bh);
1314 goto out_invalid; 1313 goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1318 || (logical_sector_size < 512) 1317 || (logical_sector_size < 512)
1319 || (logical_sector_size > 4096)) { 1318 || (logical_sector_size > 4096)) {
1320 if (!silent) 1319 if (!silent)
1321 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1320 fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
1322 logical_sector_size); 1321 logical_sector_size);
1323 brelse(bh); 1322 brelse(bh);
1324 goto out_invalid; 1323 goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1326 sbi->sec_per_clus = b->sec_per_clus; 1325 sbi->sec_per_clus = b->sec_per_clus;
1327 if (!is_power_of_2(sbi->sec_per_clus)) { 1326 if (!is_power_of_2(sbi->sec_per_clus)) {
1328 if (!silent) 1327 if (!silent)
1329 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", 1328 fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
1330 sbi->sec_per_clus); 1329 sbi->sec_per_clus);
1331 brelse(bh); 1330 brelse(bh);
1332 goto out_invalid; 1331 goto out_invalid;
1333 } 1332 }
1334 1333
1335 if (logical_sector_size < sb->s_blocksize) { 1334 if (logical_sector_size < sb->s_blocksize) {
1336 printk(KERN_ERR "FAT: logical sector size too small for device" 1335 fat_msg(sb, KERN_ERR, "logical sector size too small for device"
1337 " (logical sector size = %u)\n", logical_sector_size); 1336 " (logical sector size = %u)", logical_sector_size);
1338 brelse(bh); 1337 brelse(bh);
1339 goto out_fail; 1338 goto out_fail;
1340 } 1339 }
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1342 brelse(bh); 1341 brelse(bh);
1343 1342
1344 if (!sb_set_blocksize(sb, logical_sector_size)) { 1343 if (!sb_set_blocksize(sb, logical_sector_size)) {
1345 printk(KERN_ERR "FAT: unable to set blocksize %u\n", 1344 fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
1346 logical_sector_size); 1345 logical_sector_size);
1347 goto out_fail; 1346 goto out_fail;
1348 } 1347 }
1349 bh = sb_bread(sb, 0); 1348 bh = sb_bread(sb, 0);
1350 if (bh == NULL) { 1349 if (bh == NULL) {
1351 printk(KERN_ERR "FAT: unable to read boot sector" 1350 fat_msg(sb, KERN_ERR, "unable to read boot sector"
1352 " (logical sector size = %lu)\n", 1351 " (logical sector size = %lu)",
1353 sb->s_blocksize); 1352 sb->s_blocksize);
1354 goto out_fail; 1353 goto out_fail;
1355 } 1354 }
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1385 1384
1386 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); 1385 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
1387 if (fsinfo_bh == NULL) { 1386 if (fsinfo_bh == NULL) {
1388 printk(KERN_ERR "FAT: bread failed, FSINFO block" 1387 fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
1389 " (sector = %lu)\n", sbi->fsinfo_sector); 1388 " (sector = %lu)", sbi->fsinfo_sector);
1390 brelse(bh); 1389 brelse(bh);
1391 goto out_fail; 1390 goto out_fail;
1392 } 1391 }
1393 1392
1394 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; 1393 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
1395 if (!IS_FSINFO(fsinfo)) { 1394 if (!IS_FSINFO(fsinfo)) {
1396 printk(KERN_WARNING "FAT: Invalid FSINFO signature: " 1395 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
1397 "0x%08x, 0x%08x (sector = %lu)\n", 1396 "0x%08x, 0x%08x (sector = %lu)",
1398 le32_to_cpu(fsinfo->signature1), 1397 le32_to_cpu(fsinfo->signature1),
1399 le32_to_cpu(fsinfo->signature2), 1398 le32_to_cpu(fsinfo->signature2),
1400 sbi->fsinfo_sector); 1399 sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1415 sbi->dir_entries = get_unaligned_le16(&b->dir_entries); 1414 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1416 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1415 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1417 if (!silent) 1416 if (!silent)
1418 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1417 fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
1419 " (%u)\n", sbi->dir_entries); 1418 " (%u)", sbi->dir_entries);
1420 brelse(bh); 1419 brelse(bh);
1421 goto out_invalid; 1420 goto out_invalid;
1422 } 1421 }
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1438 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1437 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
1439 if (total_clusters > MAX_FAT(sb)) { 1438 if (total_clusters > MAX_FAT(sb)) {
1440 if (!silent) 1439 if (!silent)
1441 printk(KERN_ERR "FAT: count of clusters too big (%u)\n", 1440 fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
1442 total_clusters); 1441 total_clusters);
1443 brelse(bh); 1442 brelse(bh);
1444 goto out_invalid; 1443 goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1471 sprintf(buf, "cp%d", sbi->options.codepage); 1470 sprintf(buf, "cp%d", sbi->options.codepage);
1472 sbi->nls_disk = load_nls(buf); 1471 sbi->nls_disk = load_nls(buf);
1473 if (!sbi->nls_disk) { 1472 if (!sbi->nls_disk) {
1474 printk(KERN_ERR "FAT: codepage %s not found\n", buf); 1473 fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
1475 goto out_fail; 1474 goto out_fail;
1476 } 1475 }
1477 1476
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1479 if (sbi->options.isvfat) { 1478 if (sbi->options.isvfat) {
1480 sbi->nls_io = load_nls(sbi->options.iocharset); 1479 sbi->nls_io = load_nls(sbi->options.iocharset);
1481 if (!sbi->nls_io) { 1480 if (!sbi->nls_io) {
1482 printk(KERN_ERR "FAT: IO charset %s not found\n", 1481 fat_msg(sb, KERN_ERR, "IO charset %s not found",
1483 sbi->options.iocharset); 1482 sbi->options.iocharset);
1484 goto out_fail; 1483 goto out_fail;
1485 } 1484 }
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1503 insert_inode_hash(root_inode); 1502 insert_inode_hash(root_inode);
1504 sb->s_root = d_alloc_root(root_inode); 1503 sb->s_root = d_alloc_root(root_inode);
1505 if (!sb->s_root) { 1504 if (!sb->s_root) {
1506 printk(KERN_ERR "FAT: get root inode failed\n"); 1505 fat_msg(sb, KERN_ERR, "get root inode failed");
1507 goto out_fail; 1506 goto out_fail;
1508 } 1507 }
1509 1508
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1512out_invalid: 1511out_invalid:
1513 error = -EINVAL; 1512 error = -EINVAL;
1514 if (!silent) 1513 if (!silent)
1515 printk(KERN_INFO "VFS: Can't find a valid FAT filesystem" 1514 fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
1516 " on dev %s.\n", sb->s_id);
1517 1515
1518out_fail: 1516out_fail:
1519 if (fat_inode) 1517 if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
20 * In case the file system is remounted read-only, it can be made writable 20 * In case the file system is remounted read-only, it can be made writable
21 * again by remounting it. 21 * again by remounting it.
22 */ 22 */
23void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 23void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
24{ 24{
25 struct fat_mount_options *opts = &MSDOS_SB(s)->options; 25 struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
26 va_list args; 26 va_list args;
27 struct va_format vaf;
27 28
28 if (report) { 29 if (report) {
29 printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
30
31 printk(KERN_ERR " ");
32 va_start(args, fmt); 30 va_start(args, fmt);
33 vprintk(fmt, args); 31 vaf.fmt = fmt;
32 vaf.va = &args;
33 printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
34 va_end(args); 34 va_end(args);
35 printk("\n");
36 } 35 }
37 36
38 if (opts->errors == FAT_ERRORS_PANIC) 37 if (opts->errors == FAT_ERRORS_PANIC)
39 panic("FAT: fs panic from previous error\n"); 38 panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
40 else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { 39 else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
41 s->s_flags |= MS_RDONLY; 40 sb->s_flags |= MS_RDONLY;
42 printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); 41 printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
42 "set read-only\n", sb->s_id);
43 } 43 }
44} 44}
45EXPORT_SYMBOL_GPL(__fat_fs_error); 45EXPORT_SYMBOL_GPL(__fat_fs_error);
46 46
47/**
48 * fat_msg() - print preformated FAT specific messages. Every thing what is
49 * not fat_fs_error() should be fat_msg().
50 */
51void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
52{
53 struct va_format vaf;
54 va_list args;
55
56 va_start(args, fmt);
57 vaf.fmt = fmt;
58 vaf.va = &args;
59 printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
60 va_end(args);
61}
62
47/* Flushes the number of free clusters on FAT32 */ 63/* Flushes the number of free clusters on FAT32 */
48/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 64/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
49int fat_clusters_flush(struct super_block *sb) 65int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
57 73
58 bh = sb_bread(sb, sbi->fsinfo_sector); 74 bh = sb_bread(sb, sbi->fsinfo_sector);
59 if (bh == NULL) { 75 if (bh == NULL) {
60 printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); 76 fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
61 return -EIO; 77 return -EIO;
62 } 78 }
63 79
64 fsinfo = (struct fat_boot_fsinfo *)bh->b_data; 80 fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
65 /* Sanity check */ 81 /* Sanity check */
66 if (!IS_FSINFO(fsinfo)) { 82 if (!IS_FSINFO(fsinfo)) {
67 printk(KERN_ERR "FAT: Invalid FSINFO signature: " 83 fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
68 "0x%08x, 0x%08x (sector = %lu)\n", 84 "0x%08x, 0x%08x (sector = %lu)",
69 le32_to_cpu(fsinfo->signature1), 85 le32_to_cpu(fsinfo->signature1),
70 le32_to_cpu(fsinfo->signature2), 86 le32_to_cpu(fsinfo->signature2),
71 sbi->fsinfo_sector); 87 sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 711499040eb6..be15437c272e 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -326,6 +326,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
326 struct fat_slot_info sinfo; 326 struct fat_slot_info sinfo;
327 int err; 327 int err;
328 328
329 dentry_unhash(dentry);
330
329 lock_super(sb); 331 lock_super(sb);
330 /* 332 /*
331 * Check whether the directory is not in use, then check 333 * Check whether the directory is not in use, then check
@@ -457,6 +459,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
457 old_inode = old_dentry->d_inode; 459 old_inode = old_dentry->d_inode;
458 new_inode = new_dentry->d_inode; 460 new_inode = new_dentry->d_inode;
459 461
462 if (new_inode && S_ISDIR(new_inode->i_mode))
463 dentry_unhash(new_dentry);
464
460 err = fat_scan(old_dir, old_name, &old_sinfo); 465 err = fat_scan(old_dir, old_name, &old_sinfo);
461 if (err) { 466 if (err) {
462 err = -EIO; 467 err = -EIO;
@@ -659,14 +664,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
659 664
660static void setup(struct super_block *sb) 665static void setup(struct super_block *sb)
661{ 666{
667 MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
662 sb->s_d_op = &msdos_dentry_operations; 668 sb->s_d_op = &msdos_dentry_operations;
663 sb->s_flags |= MS_NOATIME; 669 sb->s_flags |= MS_NOATIME;
664} 670}
665 671
666static int msdos_fill_super(struct super_block *sb, void *data, int silent) 672static int msdos_fill_super(struct super_block *sb, void *data, int silent)
667{ 673{
668 return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 674 return fat_fill_super(sb, data, silent, 0, setup);
669 0, setup);
670} 675}
671 676
672static struct dentry *msdos_mount(struct file_system_type *fs_type, 677static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index adae3fb7451a..c61a6789f36c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -824,6 +824,8 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
824 struct fat_slot_info sinfo; 824 struct fat_slot_info sinfo;
825 int err; 825 int err;
826 826
827 dentry_unhash(dentry);
828
827 lock_super(sb); 829 lock_super(sb);
828 830
829 err = fat_dir_empty(inode); 831 err = fat_dir_empty(inode);
@@ -931,6 +933,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
931 int err, is_dir, update_dotdot, corrupt = 0; 933 int err, is_dir, update_dotdot, corrupt = 0;
932 struct super_block *sb = old_dir->i_sb; 934 struct super_block *sb = old_dir->i_sb;
933 935
936 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
937 dentry_unhash(new_dentry);
938
934 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; 939 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
935 old_inode = old_dentry->d_inode; 940 old_inode = old_dentry->d_inode;
936 new_inode = new_dentry->d_inode; 941 new_inode = new_dentry->d_inode;
@@ -1065,6 +1070,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1065 1070
1066static void setup(struct super_block *sb) 1071static void setup(struct super_block *sb)
1067{ 1072{
1073 MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
1068 if (MSDOS_SB(sb)->options.name_check != 's') 1074 if (MSDOS_SB(sb)->options.name_check != 's')
1069 sb->s_d_op = &vfat_ci_dentry_ops; 1075 sb->s_d_op = &vfat_ci_dentry_ops;
1070 else 1076 else
@@ -1073,8 +1079,7 @@ static void setup(struct super_block *sb)
1073 1079
1074static int vfat_fill_super(struct super_block *sb, void *data, int silent) 1080static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1075{ 1081{
1076 return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1082 return fat_fill_super(sb, data, silent, 1, setup);
1077 1, setup);
1078} 1083}
1079 1084
1080static struct dentry *vfat_mount(struct file_system_type *fs_type, 1085static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
272 * *ip: VFS inode 272 * *ip: VFS inode
273 * 273 *
274 * Description: 274 * Description:
275 * vxfs_put_fake_inode frees all data asssociated with @ip. 275 * vxfs_put_fake_inode frees all data associated with @ip.
276 */ 276 */
277void 277void
278vxfs_put_fake_inode(struct inode *ip) 278vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
33 _enter("{OBJ%x OP%x,%u}", 33 _enter("{OBJ%x OP%x,%u}",
34 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 34 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
35 35
36 fscache_set_op_state(op, "EnQ");
37
38 ASSERT(list_empty(&op->pend_link)); 36 ASSERT(list_empty(&op->pend_link));
39 ASSERT(op->processor != NULL); 37 ASSERT(op->processor != NULL);
40 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); 38 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
66static void fscache_run_op(struct fscache_object *object, 64static void fscache_run_op(struct fscache_object *object,
67 struct fscache_operation *op) 65 struct fscache_operation *op)
68{ 66{
69 fscache_set_op_state(op, "Run");
70
71 object->n_in_progress++; 67 object->n_in_progress++;
72 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) 68 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
73 wake_up_bit(&op->flags, FSCACHE_OP_WAITING); 69 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
88 84
89 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); 85 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
90 86
91 fscache_set_op_state(op, "SubmitX");
92
93 spin_lock(&object->lock); 87 spin_lock(&object->lock);
94 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 88 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
95 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 89 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
194 188
195 ASSERTCMP(atomic_read(&op->usage), >, 0); 189 ASSERTCMP(atomic_read(&op->usage), >, 0);
196 190
197 fscache_set_op_state(op, "Submit");
198
199 spin_lock(&object->lock); 191 spin_lock(&object->lock);
200 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 192 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
201 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 193 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
335 if (!atomic_dec_and_test(&op->usage)) 327 if (!atomic_dec_and_test(&op->usage))
336 return; 328 return;
337 329
338 fscache_set_op_state(op, "Put");
339
340 _debug("PUT OP"); 330 _debug("PUT OP");
341 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) 331 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
342 BUG(); 332 BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
155 fscache_stat(&fscache_n_attr_changed_calls); 155 fscache_stat(&fscache_n_attr_changed_calls);
156 156
157 if (fscache_object_is_active(object)) { 157 if (fscache_object_is_active(object)) {
158 fscache_set_op_state(op, "CallFS");
159 fscache_stat(&fscache_n_cop_attr_changed); 158 fscache_stat(&fscache_n_cop_attr_changed);
160 ret = object->cache->ops->attr_changed(object); 159 ret = object->cache->ops->attr_changed(object);
161 fscache_stat_d(&fscache_n_cop_attr_changed); 160 fscache_stat_d(&fscache_n_cop_attr_changed);
162 fscache_set_op_state(op, "Done");
163 if (ret < 0) 161 if (ret < 0)
164 fscache_abort_object(object); 162 fscache_abort_object(object);
165 } 163 }
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
190 188
191 fscache_operation_init(op, fscache_attr_changed_op, NULL); 189 fscache_operation_init(op, fscache_attr_changed_op, NULL);
192 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); 190 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
193 fscache_set_op_name(op, "Attr");
194 191
195 spin_lock(&cookie->lock); 192 spin_lock(&cookie->lock);
196 193
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
257 op->context = context; 254 op->context = context;
258 op->start_time = jiffies; 255 op->start_time = jiffies;
259 INIT_LIST_HEAD(&op->to_do); 256 INIT_LIST_HEAD(&op->to_do);
260 fscache_set_op_name(&op->op, "Retr");
261 return op; 257 return op;
262} 258}
263 259
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
368 _leave(" = -ENOMEM"); 364 _leave(" = -ENOMEM");
369 return -ENOMEM; 365 return -ENOMEM;
370 } 366 }
371 fscache_set_op_name(&op->op, "RetrRA1");
372 367
373 spin_lock(&cookie->lock); 368 spin_lock(&cookie->lock);
374 369
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
487 op = fscache_alloc_retrieval(mapping, end_io_func, context); 482 op = fscache_alloc_retrieval(mapping, end_io_func, context);
488 if (!op) 483 if (!op)
489 return -ENOMEM; 484 return -ENOMEM;
490 fscache_set_op_name(&op->op, "RetrRAN");
491 485
492 spin_lock(&cookie->lock); 486 spin_lock(&cookie->lock);
493 487
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
589 op = fscache_alloc_retrieval(page->mapping, NULL, NULL); 583 op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
590 if (!op) 584 if (!op)
591 return -ENOMEM; 585 return -ENOMEM;
592 fscache_set_op_name(&op->op, "RetrAL1");
593 586
594 spin_lock(&cookie->lock); 587 spin_lock(&cookie->lock);
595 588
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
662 655
663 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); 656 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
664 657
665 fscache_set_op_state(&op->op, "GetPage");
666
667 spin_lock(&object->lock); 658 spin_lock(&object->lock);
668 cookie = object->cookie; 659 cookie = object->cookie;
669 660
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
698 spin_unlock(&cookie->stores_lock); 689 spin_unlock(&cookie->stores_lock);
699 spin_unlock(&object->lock); 690 spin_unlock(&object->lock);
700 691
701 fscache_set_op_state(&op->op, "Store");
702 fscache_stat(&fscache_n_store_pages); 692 fscache_stat(&fscache_n_store_pages);
703 fscache_stat(&fscache_n_cop_write_page); 693 fscache_stat(&fscache_n_cop_write_page);
704 ret = object->cache->ops->write_page(op, page); 694 ret = object->cache->ops->write_page(op, page);
705 fscache_stat_d(&fscache_n_cop_write_page); 695 fscache_stat_d(&fscache_n_cop_write_page);
706 fscache_set_op_state(&op->op, "EndWrite");
707 fscache_end_page_write(object, page); 696 fscache_end_page_write(object, page);
708 if (ret < 0) { 697 if (ret < 0) {
709 fscache_set_op_state(&op->op, "Abort");
710 fscache_abort_object(object); 698 fscache_abort_object(object);
711 } else { 699 } else {
712 fscache_enqueue_operation(&op->op); 700 fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
778 fscache_operation_init(&op->op, fscache_write_op, 766 fscache_operation_init(&op->op, fscache_write_op,
779 fscache_release_write_op); 767 fscache_release_write_op);
780 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); 768 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
781 fscache_set_op_name(&op->op, "Write1");
782 769
783 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 770 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
784 if (ret < 0) 771 if (ret < 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b32eb29a4e6f..0d0e3faddcfa 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -667,6 +667,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
667 if (IS_ERR(req)) 667 if (IS_ERR(req))
668 return PTR_ERR(req); 668 return PTR_ERR(req);
669 669
670 dentry_unhash(entry);
671
670 req->in.h.opcode = FUSE_RMDIR; 672 req->in.h.opcode = FUSE_RMDIR;
671 req->in.h.nodeid = get_node_id(dir); 673 req->in.h.nodeid = get_node_id(dir);
672 req->in.numargs = 1; 674 req->in.numargs = 1;
@@ -691,6 +693,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
691 struct fuse_rename_in inarg; 693 struct fuse_rename_in inarg;
692 struct fuse_conn *fc = get_fuse_conn(olddir); 694 struct fuse_conn *fc = get_fuse_conn(olddir);
693 struct fuse_req *req = fuse_get_req(fc); 695 struct fuse_req *req = fuse_get_req(fc);
696
697 if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode))
698 dentry_unhash(newent);
699
694 if (IS_ERR(req)) 700 if (IS_ERR(req))
695 return PTR_ERR(req); 701 return PTR_ERR(req);
696 702
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1ccflags-y := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o inode.o quota.o \
7 recovery.o rgrp.o super.o sys.o trans.o util.o 7 recovery.o rgrp.o super.o sys.o trans.o util.o
8 8
9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o 9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1076 bd = bh->b_private; 1076 bd = bh->b_private;
1077 if (bd && bd->bd_ail) 1077 if (bd && bd->bd_ail)
1078 goto cannot_release; 1078 goto cannot_release;
1079 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 1079 if (buffer_pinned(bh) || buffer_dirty(bh))
1080 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 1080 goto not_possible;
1081 bh = bh->b_this_page; 1081 bh = bh->b_this_page;
1082 } while(bh != head); 1082 } while(bh != head);
1083 gfs2_log_unlock(sdp); 1083 gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1107 } while (bh != head); 1107 } while (bh != head);
1108 1108
1109 return try_to_free_buffers(page); 1109 return try_to_free_buffers(page);
1110
1111not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh));
1110cannot_release: 1114cannot_release:
1111 gfs2_log_unlock(sdp); 1115 gfs2_log_unlock(sdp);
1112 return 0; 1116 return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 metadata = (height != ip->i_height - 1); 780 metadata = (height != ip->i_height - 1);
781 if (metadata) 781 if (metadata)
782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
783 else if (ip->i_depth)
784 revokes = sdp->sd_inptrs;
783 785
784 if (ip != GFS2_I(sdp->sd_rindex)) 786 if (ip != GFS2_I(sdp->sd_rindex))
785 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 787 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
82struct qstr gfs2_qdot __read_mostly; 82struct qstr gfs2_qdot __read_mostly;
83struct qstr gfs2_qdotdot __read_mostly; 83struct qstr gfs2_qdotdot __read_mostly;
84 84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
86 u64 leaf_no, void *data);
87typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88 const struct qstr *name, void *opaque); 86 const struct qstr *name, void *opaque);
89 87
90
91int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
92 struct buffer_head **bhp) 89 struct buffer_head **bhp)
93{ 90{
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1600 */ 1597 */
1601 1598
1602int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1603 const struct gfs2_inode *nip, unsigned type) 1600 const struct gfs2_inode *nip)
1604{ 1601{
1605 struct gfs2_inode *ip = GFS2_I(inode); 1602 struct gfs2_inode *ip = GFS2_I(inode);
1606 struct buffer_head *bh; 1603 struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 return PTR_ERR(dent); 1613 return PTR_ERR(dent);
1617 dent = gfs2_init_dirent(inode, dent, name, bh); 1614 dent = gfs2_init_dirent(inode, dent, name, bh);
1618 gfs2_inum_out(nip, dent); 1615 gfs2_inum_out(nip, dent);
1619 dent->de_type = cpu_to_be16(type); 1616 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1620 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1617 if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1621 leaf = (struct gfs2_leaf *)bh->b_data; 1618 leaf = (struct gfs2_leaf *)bh->b_data;
1622 be16_add_cpu(&leaf->lf_entries, 1); 1619 be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1628 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1625 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1629 ip->i_entries++; 1626 ip->i_entries++;
1630 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628 if (S_ISDIR(nip->i_inode.i_mode))
1629 inc_nlink(&ip->i_inode);
1631 gfs2_dinode_out(ip, bh->b_data); 1630 gfs2_dinode_out(ip, bh->b_data);
1632 brelse(bh); 1631 brelse(bh);
1633 error = 0; 1632 error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1672 * Returns: 0 on success, error code on failure 1671 * Returns: 0 on success, error code on failure
1673 */ 1672 */
1674 1673
1675int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1676{ 1675{
1676 const struct qstr *name = &dentry->d_name;
1677 struct gfs2_dirent *dent, *prev = NULL; 1677 struct gfs2_dirent *dent, *prev = NULL;
1678 struct buffer_head *bh; 1678 struct buffer_head *bh;
1679 int error; 1679 int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1714 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1714 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715 dip->i_entries--; 1715 dip->i_entries--;
1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717 if (S_ISDIR(dentry->d_inode->i_mode))
1718 drop_nlink(&dip->i_inode);
1717 gfs2_dinode_out(dip, bh->b_data); 1719 gfs2_dinode_out(dip, bh->b_data);
1718 brelse(bh); 1720 brelse(bh);
1719 mark_inode_dirty(&dip->i_inode); 1721 mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1768} 1770}
1769 1771
1770/** 1772/**
1771 * foreach_leaf - call a function for each leaf in a directory
1772 * @dip: the directory
1773 * @lc: the function to call for each each
1774 * @data: private data to pass to it
1775 *
1776 * Returns: errno
1777 */
1778
1779static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1780{
1781 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1782 struct buffer_head *bh;
1783 struct gfs2_leaf *leaf;
1784 u32 hsize, len;
1785 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1786 u32 index = 0;
1787 __be64 *lp;
1788 u64 leaf_no;
1789 int error = 0;
1790
1791 hsize = 1 << dip->i_depth;
1792 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1793 gfs2_consist_inode(dip);
1794 return -EIO;
1795 }
1796
1797 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1798 if (!lp)
1799 return -ENOMEM;
1800
1801 while (index < hsize) {
1802 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1803 ht_offset = index - lp_offset;
1804
1805 if (ht_offset_cur != ht_offset) {
1806 error = gfs2_dir_read_data(dip, (char *)lp,
1807 ht_offset * sizeof(__be64),
1808 sdp->sd_hash_bsize, 1);
1809 if (error != sdp->sd_hash_bsize) {
1810 if (error >= 0)
1811 error = -EIO;
1812 goto out;
1813 }
1814 ht_offset_cur = ht_offset;
1815 }
1816
1817 leaf_no = be64_to_cpu(lp[lp_offset]);
1818 if (leaf_no) {
1819 error = get_leaf(dip, leaf_no, &bh);
1820 if (error)
1821 goto out;
1822 leaf = (struct gfs2_leaf *)bh->b_data;
1823 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1824 brelse(bh);
1825
1826 error = lc(dip, index, len, leaf_no, data);
1827 if (error)
1828 goto out;
1829
1830 index = (index & ~(len - 1)) + len;
1831 } else
1832 index++;
1833 }
1834
1835 if (index != hsize) {
1836 gfs2_consist_inode(dip);
1837 error = -EIO;
1838 }
1839
1840out:
1841 kfree(lp);
1842
1843 return error;
1844}
1845
1846/**
1847 * leaf_dealloc - Deallocate a directory leaf 1773 * leaf_dealloc - Deallocate a directory leaf
1848 * @dip: the directory 1774 * @dip: the directory
1849 * @index: the hash table offset in the directory 1775 * @index: the hash table offset in the directory
1850 * @len: the number of pointers to this leaf 1776 * @len: the number of pointers to this leaf
1851 * @leaf_no: the leaf number 1777 * @leaf_no: the leaf number
1852 * @data: not used 1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1853 * 1780 *
1854 * Returns: errno 1781 * Returns: errno
1855 */ 1782 */
1856 1783
1857static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1858 u64 leaf_no, void *data) 1785 u64 leaf_no, struct buffer_head *leaf_bh,
1786 int last_dealloc)
1859{ 1787{
1860 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1788 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1861 struct gfs2_leaf *tmp_leaf; 1789 struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1887 goto out_qs; 1815 goto out_qs;
1888 1816
1889 /* Count the number of leaves */ 1817 /* Count the number of leaves */
1818 bh = leaf_bh;
1890 1819
1891 for (blk = leaf_no; blk; blk = nblk) { 1820 for (blk = leaf_no; blk; blk = nblk) {
1892 error = get_leaf(dip, blk, &bh); 1821 if (blk != leaf_no) {
1893 if (error) 1822 error = get_leaf(dip, blk, &bh);
1894 goto out_rlist; 1823 if (error)
1824 goto out_rlist;
1825 }
1895 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1826 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1896 nblk = be64_to_cpu(tmp_leaf->lf_next); 1827 nblk = be64_to_cpu(tmp_leaf->lf_next);
1897 brelse(bh); 1828 if (blk != leaf_no)
1829 brelse(bh);
1898 1830
1899 gfs2_rlist_add(sdp, &rlist, blk); 1831 gfs2_rlist_add(sdp, &rlist, blk);
1900 l_blocks++; 1832 l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1918 if (error) 1850 if (error)
1919 goto out_rg_gunlock; 1851 goto out_rg_gunlock;
1920 1852
1853 bh = leaf_bh;
1854
1921 for (blk = leaf_no; blk; blk = nblk) { 1855 for (blk = leaf_no; blk; blk = nblk) {
1922 error = get_leaf(dip, blk, &bh); 1856 if (blk != leaf_no) {
1923 if (error) 1857 error = get_leaf(dip, blk, &bh);
1924 goto out_end_trans; 1858 if (error)
1859 goto out_end_trans;
1860 }
1925 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1926 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 nblk = be64_to_cpu(tmp_leaf->lf_next);
1927 brelse(bh); 1863 if (blk != leaf_no)
1864 brelse(bh);
1928 1865
1929 gfs2_free_meta(dip, blk, 1); 1866 gfs2_free_meta(dip, blk, 1);
1930 gfs2_add_inode_blocks(&dip->i_inode, -1); 1867 gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1942 goto out_end_trans; 1879 goto out_end_trans;
1943 1880
1944 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1881 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882 /* On the last dealloc, make this a regular file in case we crash.
1883 (We don't want to free these blocks a second time.) */
1884 if (last_dealloc)
1885 dip->i_inode.i_mode = S_IFREG;
1945 gfs2_dinode_out(dip, dibh->b_data); 1886 gfs2_dinode_out(dip, dibh->b_data);
1946 brelse(dibh); 1887 brelse(dibh);
1947 1888
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1975{ 1916{
1976 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1977 struct buffer_head *bh; 1918 struct buffer_head *bh;
1978 int error; 1919 struct gfs2_leaf *leaf;
1920 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index;
1923 __be64 *lp;
1924 u64 leaf_no;
1925 int error = 0, last;
1979 1926
1980 /* Dealloc on-disk leaves to FREEMETA state */ 1927 hsize = 1 << dip->i_depth;
1981 error = foreach_leaf(dip, leaf_dealloc, NULL); 1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1982 if (error) 1929 gfs2_consist_inode(dip);
1983 return error; 1930 return -EIO;
1931 }
1984 1932
1985 /* Make this a regular file in case we crash. 1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1986 (We don't want to free these blocks a second time.) */ 1934 if (!lp)
1935 return -ENOMEM;
1987 1936
1988 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1937 while (index < hsize) {
1989 if (error) 1938 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1990 return error; 1939 ht_offset = index - lp_offset;
1991 1940
1992 error = gfs2_meta_inode_buffer(dip, &bh); 1941 if (ht_offset_cur != ht_offset) {
1993 if (!error) { 1942 error = gfs2_dir_read_data(dip, (char *)lp,
1994 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1943 ht_offset * sizeof(__be64),
1995 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1944 sdp->sd_hash_bsize, 1);
1996 cpu_to_be32(S_IFREG); 1945 if (error != sdp->sd_hash_bsize) {
1997 brelse(bh); 1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh);
1956 if (error)
1957 goto out;
1958 leaf = (struct gfs2_leaf *)bh->b_data;
1959 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961 next_index = (index & ~(len - 1)) + len;
1962 last = ((next_index >= hsize) ? 1 : 0);
1963 error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964 last);
1965 brelse(bh);
1966 if (error)
1967 goto out;
1968 index = next_index;
1969 } else
1970 index++;
1998 } 1971 }
1999 1972
2000 gfs2_trans_end(sdp); 1973 if (index != hsize) {
1974 gfs2_consist_inode(dip);
1975 error = -EIO;
1976 }
1977
1978out:
1979 kfree(lp);
2001 1980
2002 return error; 1981 return error;
2003} 1982}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, 22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
23 const struct gfs2_inode *ip); 23 const struct gfs2_inode *ip);
24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip, unsigned int type); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
139 struct gfs2_sbd *sdp = sb->s_fs_info; 139 struct gfs2_sbd *sdp = sb->s_fs_info;
140 struct inode *inode; 140 struct inode *inode;
141 141
142 inode = gfs2_ilookup(sb, inum->no_addr); 142 inode = gfs2_ilookup(sb, inum->no_addr, 0);
143 if (inode) { 143 if (inode) {
144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
145 iput(inode); 145 iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
545/** 545/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 546 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 547 * @file: the file that points to the dentry (we ignore this)
548 * @dentry: the dentry that points to the inode to sync 548 * @datasync: set if we can ignore timestamp changes
549 * 549 *
550 * The VFS will flush "normal" data for us. We only need to worry 550 * The VFS will flush data for us. We only need to worry
551 * about metadata here. For journaled data, we just do a log flush 551 * about metadata here.
552 * as we can't avoid it. Otherwise we can just bale out if datasync
553 * is set. For stuffed inodes we must flush the log in order to
554 * ensure that all data is on disk.
555 *
556 * The call to write_inode_now() is there to write back metadata and
557 * the inode itself. It does also try and write the data, but thats
558 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
559 * for us.
560 * 552 *
561 * Returns: errno 553 * Returns: errno
562 */ 554 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
565{ 557{
566 struct inode *inode = file->f_mapping->host; 558 struct inode *inode = file->f_mapping->host;
567 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
568 int ret = 0; 560 struct gfs2_inode *ip = GFS2_I(inode);
569 561 int ret;
570 if (gfs2_is_jdata(GFS2_I(inode))) {
571 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
572 return 0;
573 }
574 562
575 if (sync_state != 0) { 563 if (datasync)
576 if (!datasync) 564 sync_state &= ~I_DIRTY_SYNC;
577 ret = write_inode_now(inode, 0);
578 565
579 if (gfs2_is_stuffed(GFS2_I(inode))) 566 if (sync_state) {
580 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 567 ret = sync_inode_metadata(inode, 1);
568 if (ret)
569 return ret;
570 gfs2_ail_flush(ip->i_gl);
581 } 571 }
582 572
583 return ret; 573 return 0;
584} 574}
585 575
586/** 576/**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
826 loff_t bytes, max_bytes; 816 loff_t bytes, max_bytes;
827 struct gfs2_alloc *al; 817 struct gfs2_alloc *al;
828 int error; 818 int error;
819 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
829 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 820 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
830 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 821 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
831 822
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
833 if (mode & ~FALLOC_FL_KEEP_SIZE) 824 if (mode & ~FALLOC_FL_KEEP_SIZE)
834 return -EOPNOTSUPP; 825 return -EOPNOTSUPP;
835 826
836 offset = (offset >> sdp->sd_sb.sb_bsize_shift) << 827 offset &= bsize_mask;
837 sdp->sd_sb.sb_bsize_shift;
838 828
839 len = next - offset; 829 len = next - offset;
840 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 830 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
841 if (!bytes) 831 if (!bytes)
842 bytes = UINT_MAX; 832 bytes = UINT_MAX;
833 bytes &= bsize_mask;
834 if (bytes == 0)
835 bytes = sdp->sd_sb.sb_bsize;
843 836
844 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 837 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
845 error = gfs2_glock_nq(&ip->i_gh); 838 error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
870 if (error) { 863 if (error) {
871 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 864 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
872 bytes >>= 1; 865 bytes >>= 1;
866 bytes &= bsize_mask;
867 if (bytes == 0)
868 bytes = sdp->sd_sb.sb_bsize;
873 goto retry; 869 goto retry;
874 } 870 }
875 goto out_qunlock; 871 goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
143{ 143{
144 const struct gfs2_glock_operations *glops = gl->gl_ops; 144 const struct gfs2_glock_operations *glops = gl->gl_ops;
145 145
146 /* assert_spin_locked(&gl->gl_spin); */
147
148 if (gl->gl_state == LM_ST_UNLOCKED) 146 if (gl->gl_state == LM_ST_UNLOCKED)
149 return 0; 147 return 0;
150 if (test_bit(GLF_LFLUSH, &gl->gl_flags)) 148 if (!list_empty(&gl->gl_holders))
151 return 0;
152 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
153 !list_empty(&gl->gl_holders))
154 return 0; 149 return 0;
155 if (glops->go_demote_ok) 150 if (glops->go_demote_ok)
156 return glops->go_demote_ok(gl); 151 return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
158} 153}
159 154
160 155
156void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
157{
158 spin_lock(&lru_lock);
159
160 if (!list_empty(&gl->gl_lru))
161 list_del_init(&gl->gl_lru);
162 else
163 atomic_inc(&lru_count);
164
165 list_add_tail(&gl->gl_lru, &lru_list);
166 set_bit(GLF_LRU, &gl->gl_flags);
167 spin_unlock(&lru_lock);
168}
169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags);
177 }
178 spin_unlock(&lru_lock);
179}
180
161/** 181/**
162 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 182 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
163 * @gl: the glock 183 * @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
168 188
169static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 189static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
170{ 190{
171 if (demote_ok(gl)) { 191 if (demote_ok(gl))
172 spin_lock(&lru_lock); 192 gfs2_glock_add_to_lru(gl);
173
174 if (!list_empty(&gl->gl_lru))
175 list_del_init(&gl->gl_lru);
176 else
177 atomic_inc(&lru_count);
178
179 list_add_tail(&gl->gl_lru, &lru_list);
180 spin_unlock(&lru_lock);
181 }
182}
183
184void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
185{
186 spin_lock(&gl->gl_spin);
187 __gfs2_glock_schedule_for_reclaim(gl);
188 spin_unlock(&gl->gl_spin);
189} 193}
190 194
191/** 195/**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 spin_lock_bucket(gl->gl_hash); 221 spin_lock_bucket(gl->gl_hash);
218 hlist_bl_del_rcu(&gl->gl_list); 222 hlist_bl_del_rcu(&gl->gl_list);
219 spin_unlock_bucket(gl->gl_hash); 223 spin_unlock_bucket(gl->gl_hash);
220 spin_lock(&lru_lock); 224 gfs2_glock_remove_from_lru(gl);
221 if (!list_empty(&gl->gl_lru)) {
222 list_del_init(&gl->gl_lru);
223 atomic_dec(&lru_count);
224 }
225 spin_unlock(&lru_lock);
226 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
227 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
228 trace_gfs2_glock_put(gl); 227 trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
542 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 541 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
543 542
544 gfs2_glock_hold(gl); 543 gfs2_glock_hold(gl);
545 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
546 gl->gl_state == LM_ST_DEFERRED) &&
547 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
548 lck_flags |= LM_FLAG_TRY_1CB;
549
550 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 544 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
551 /* lock_dlm */ 545 /* lock_dlm */
552 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 546 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
648 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 642 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
649 643
650 if (ip) 644 if (ip)
651 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 645 inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
652 else 646 else
653 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 647 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
654 if (inode && !IS_ERR(inode)) { 648 if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1025 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1019 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1026 return -EIO; 1020 return -EIO;
1027 1021
1022 if (test_bit(GLF_LRU, &gl->gl_flags))
1023 gfs2_glock_remove_from_lru(gl);
1024
1028 spin_lock(&gl->gl_spin); 1025 spin_lock(&gl->gl_spin);
1029 add_to_queue(gh); 1026 add_to_queue(gh);
1030 if ((LM_FLAG_NOEXP & gh->gh_flags) && 1027 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1082 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1079 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1083 fast_path = 1; 1080 fast_path = 1;
1084 } 1081 }
1085 __gfs2_glock_schedule_for_reclaim(gl); 1082 if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
1083 __gfs2_glock_schedule_for_reclaim(gl);
1086 trace_gfs2_glock_queue(gh, 0); 1084 trace_gfs2_glock_queue(gh, 0);
1087 spin_unlock(&gl->gl_spin); 1085 spin_unlock(&gl->gl_spin);
1088 if (likely(fast_path)) 1086 if (likely(fast_path))
@@ -1348,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1346}
1349 1347
1350 1348
1351static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1349static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1350 struct shrink_control *sc)
1352{ 1351{
1353 struct gfs2_glock *gl; 1352 struct gfs2_glock *gl;
1354 int may_demote; 1353 int may_demote;
1355 int nr_skipped = 0; 1354 int nr_skipped = 0;
1355 int nr = sc->nr_to_scan;
1356 gfp_t gfp_mask = sc->gfp_mask;
1356 LIST_HEAD(skipped); 1357 LIST_HEAD(skipped);
1357 1358
1358 if (nr == 0) 1359 if (nr == 0)
@@ -1365,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1365 while(nr && !list_empty(&lru_list)) { 1366 while(nr && !list_empty(&lru_list)) {
1366 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1367 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1367 list_del_init(&gl->gl_lru); 1368 list_del_init(&gl->gl_lru);
1369 clear_bit(GLF_LRU, &gl->gl_flags);
1368 atomic_dec(&lru_count); 1370 atomic_dec(&lru_count);
1369 1371
1370 /* Test for being demotable */ 1372 /* Test for being demotable */
@@ -1387,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1387 } 1389 }
1388 nr_skipped++; 1390 nr_skipped++;
1389 list_add(&gl->gl_lru, &skipped); 1391 list_add(&gl->gl_lru, &skipped);
1392 set_bit(GLF_LRU, &gl->gl_flags);
1390 } 1393 }
1391 list_splice(&skipped, &lru_list); 1394 list_splice(&skipped, &lru_list);
1392 atomic_add(nr_skipped, &lru_count); 1395 atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl)
1459 1462
1460static void clear_glock(struct gfs2_glock *gl) 1463static void clear_glock(struct gfs2_glock *gl)
1461{ 1464{
1462 spin_lock(&lru_lock); 1465 gfs2_glock_remove_from_lru(gl);
1463 if (!list_empty(&gl->gl_lru)) {
1464 list_del_init(&gl->gl_lru);
1465 atomic_dec(&lru_count);
1466 }
1467 spin_unlock(&lru_lock);
1468 1466
1469 spin_lock(&gl->gl_spin); 1467 spin_lock(&gl->gl_spin);
1470 if (gl->gl_state != LM_ST_UNLOCKED) 1468 if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1599 return 0; 1597 return 0;
1600} 1598}
1601 1599
1602static const char *gflags2str(char *buf, const unsigned long *gflags) 1600static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1603{ 1601{
1602 const unsigned long *gflags = &gl->gl_flags;
1604 char *p = buf; 1603 char *p = buf;
1604
1605 if (test_bit(GLF_LOCK, gflags)) 1605 if (test_bit(GLF_LOCK, gflags))
1606 *p++ = 'l'; 1606 *p++ = 'l';
1607 if (test_bit(GLF_DEMOTE, gflags)) 1607 if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1624 *p++ = 'F'; 1624 *p++ = 'F';
1625 if (test_bit(GLF_QUEUED, gflags)) 1625 if (test_bit(GLF_QUEUED, gflags))
1626 *p++ = 'q'; 1626 *p++ = 'q';
1627 if (test_bit(GLF_LRU, gflags))
1628 *p++ = 'L';
1629 if (gl->gl_object)
1630 *p++ = 'o';
1627 *p = 0; 1631 *p = 0;
1628 return buf; 1632 return buf;
1629} 1633}
@@ -1658,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1658 dtime *= 1000000/HZ; /* demote time in uSec */ 1662 dtime *= 1000000/HZ; /* demote time in uSec */
1659 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1663 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1660 dtime = 0; 1664 dtime = 0;
1661 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", 1665 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
1662 state2str(gl->gl_state), 1666 state2str(gl->gl_state),
1663 gl->gl_name.ln_type, 1667 gl->gl_name.ln_type,
1664 (unsigned long long)gl->gl_name.ln_number, 1668 (unsigned long long)gl->gl_name.ln_number,
1665 gflags2str(gflags_buf, &gl->gl_flags), 1669 gflags2str(gflags_buf, gl),
1666 state2str(gl->gl_target), 1670 state2str(gl->gl_target),
1667 state2str(gl->gl_demote_state), dtime, 1671 state2str(gl->gl_demote_state), dtime,
1668 atomic_read(&gl->gl_ail_count), 1672 atomic_read(&gl->gl_ail_count),
1673 atomic_read(&gl->gl_revokes),
1669 atomic_read(&gl->gl_ref)); 1674 atomic_read(&gl->gl_ref));
1670 1675
1671 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1676 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
225 225
226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 228extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 229extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); 230extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 231extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
233extern void gfs2_glock_free(struct gfs2_glock *gl); 232extern void gfs2_glock_free(struct gfs2_glock *gl);
234 233
235extern int __init gfs2_glock_init(void); 234extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
28#include "trans.h" 28#include "trans.h"
29 29
30/** 30/**
31 * ail_empty_gl - remove all buffers for a given lock from the AIL 31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
32 * @gl: the glock 32 * @gl: the glock
33 * 33 *
34 * None of the buffers should be dirty, locked, or pinned. 34 * None of the buffers should be dirty, locked, or pinned.
35 */ 35 */
36 36
37static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void __gfs2_ail_flush(struct gfs2_glock *gl)
38{ 38{
39 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
40 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 struct buffer_head *bh; 42 struct buffer_head *bh;
43 struct gfs2_trans tr;
44
45 memset(&tr, 0, sizeof(tr));
46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
47
48 if (!tr.tr_revokes)
49 return;
50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58 43
59 spin_lock(&sdp->sd_ail_lock); 44 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 45 while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 } 61 }
77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 62 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 63 spin_unlock(&sdp->sd_ail_lock);
64}
65
66
67static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
68{
69 struct gfs2_sbd *sdp = gl->gl_sbd;
70 struct gfs2_trans tr;
71
72 memset(&tr, 0, sizeof(tr));
73 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
74
75 if (!tr.tr_revokes)
76 return;
77
78 /* A shortened, inline version of gfs2_trans_begin() */
79 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
80 tr.tr_ip = (unsigned long)__builtin_return_address(0);
81 INIT_LIST_HEAD(&tr.tr_list_buf);
82 gfs2_log_reserve(sdp, tr.tr_reserved);
83 BUG_ON(current->journal_info);
84 current->journal_info = &tr;
85
86 __gfs2_ail_flush(gl);
87
88 gfs2_trans_end(sdp);
89 gfs2_log_flush(sdp, NULL);
90}
91
92void gfs2_ail_flush(struct gfs2_glock *gl)
93{
94 struct gfs2_sbd *sdp = gl->gl_sbd;
95 unsigned int revokes = atomic_read(&gl->gl_ail_count);
96 int ret;
97
98 if (!revokes)
99 return;
79 100
101 ret = gfs2_trans_begin(sdp, 0, revokes);
102 if (ret)
103 return;
104 __gfs2_ail_flush(gl);
80 gfs2_trans_end(sdp); 105 gfs2_trans_end(sdp);
81 gfs2_log_flush(sdp, NULL); 106 gfs2_log_flush(sdp, NULL);
82} 107}
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
227} 252}
228 253
229/** 254/**
255 * gfs2_set_nlink - Set the inode's link count based on on-disk info
256 * @inode: The inode in question
257 * @nlink: The link count
258 *
259 * If the link count has hit zero, it must never be raised, whatever the
260 * on-disk inode might say. When new struct inodes are created the link
261 * count is set to 1, so that we can safely use this test even when reading
262 * in on disk information for the first time.
263 */
264
265static void gfs2_set_nlink(struct inode *inode, u32 nlink)
266{
267 /*
268 * We will need to review setting the nlink count here in the
269 * light of the forthcoming ro bind mount work. This is a reminder
270 * to do that.
271 */
272 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
273 if (nlink == 0)
274 clear_nlink(inode);
275 else
276 inode->i_nlink = nlink;
277 }
278}
279
280static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
281{
282 const struct gfs2_dinode *str = buf;
283 struct timespec atime;
284 u16 height, depth;
285
286 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
287 goto corrupt;
288 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
289 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
290 ip->i_inode.i_rdev = 0;
291 switch (ip->i_inode.i_mode & S_IFMT) {
292 case S_IFBLK:
293 case S_IFCHR:
294 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
295 be32_to_cpu(str->di_minor));
296 break;
297 };
298
299 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
300 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
301 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
302 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
303 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
304 atime.tv_sec = be64_to_cpu(str->di_atime);
305 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
306 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
307 ip->i_inode.i_atime = atime;
308 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
309 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
310 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
311 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
312
313 ip->i_goal = be64_to_cpu(str->di_goal_meta);
314 ip->i_generation = be64_to_cpu(str->di_generation);
315
316 ip->i_diskflags = be32_to_cpu(str->di_flags);
317 gfs2_set_inode_flags(&ip->i_inode);
318 height = be16_to_cpu(str->di_height);
319 if (unlikely(height > GFS2_MAX_META_HEIGHT))
320 goto corrupt;
321 ip->i_height = (u8)height;
322
323 depth = be16_to_cpu(str->di_depth);
324 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
325 goto corrupt;
326 ip->i_depth = (u8)depth;
327 ip->i_entries = be32_to_cpu(str->di_entries);
328
329 ip->i_eattr = be64_to_cpu(str->di_eattr);
330 if (S_ISREG(ip->i_inode.i_mode))
331 gfs2_set_aops(&ip->i_inode);
332
333 return 0;
334corrupt:
335 gfs2_consist_inode(ip);
336 return -EIO;
337}
338
339/**
340 * gfs2_inode_refresh - Refresh the incore copy of the dinode
341 * @ip: The GFS2 inode
342 *
343 * Returns: errno
344 */
345
346int gfs2_inode_refresh(struct gfs2_inode *ip)
347{
348 struct buffer_head *dibh;
349 int error;
350
351 error = gfs2_meta_inode_buffer(ip, &dibh);
352 if (error)
353 return error;
354
355 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
356 brelse(dibh);
357 return -EIO;
358 }
359
360 error = gfs2_dinode_in(ip, dibh->b_data);
361 brelse(dibh);
362 clear_bit(GIF_INVALID, &ip->i_flags);
363
364 return error;
365}
366
367/**
230 * inode_go_lock - operation done after an inode lock is locked by a process 368 * inode_go_lock - operation done after an inode lock is locked by a process
231 * @gl: the glock 369 * @gl: the glock
232 * @flags: 370 * @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[]; 24extern const struct gfs2_glock_operations *gfs2_glops_list[];
25 25
26extern void gfs2_ail_flush(struct gfs2_glock *gl);
27
26#endif /* __GLOPS_DOT_H__ */ 28#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
20 20
21#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
22#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
23#define DIO_ALL 0x00000100
24 23
25struct gfs2_log_operations; 24struct gfs2_log_operations;
26struct gfs2_log_element; 25struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
200 GLF_INITIAL = 10, 199 GLF_INITIAL = 10,
201 GLF_FROZEN = 11, 200 GLF_FROZEN = 11,
202 GLF_QUEUED = 12, 201 GLF_QUEUED = 12,
202 GLF_LRU = 13,
203 GLF_OBJECT = 14, /* Used only for tracing */
203}; 204};
204 205
205struct gfs2_glock { 206struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
234 235
235 struct list_head gl_ail_list; 236 struct list_head gl_ail_list;
236 atomic_t gl_ail_count; 237 atomic_t gl_ail_count;
238 atomic_t gl_revokes;
237 struct delayed_work gl_work; 239 struct delayed_work gl_work;
238 struct work_struct gl_delete; 240 struct work_struct gl_delete;
239 struct rcu_head gl_rcu; 241 struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
374 unsigned int ai_first; 376 unsigned int ai_first;
375 struct list_head ai_ail1_list; 377 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list; 378 struct list_head ai_ail2_list;
377
378 u64 ai_sync_gen;
379}; 379};
380 380
381struct gfs2_journal_extent { 381struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
488 488
489 char sb_lockproto[GFS2_LOCKNAME_LEN]; 489 char sb_lockproto[GFS2_LOCKNAME_LEN];
490 char sb_locktable[GFS2_LOCKNAME_LEN]; 490 char sb_locktable[GFS2_LOCKNAME_LEN];
491 u8 sb_uuid[16];
492}; 491};
493 492
494/* 493/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
654 spinlock_t sd_ail_lock; 653 spinlock_t sd_ail_lock;
655 struct list_head sd_ail1_list; 654 struct list_head sd_ail1_list;
656 struct list_head sd_ail2_list; 655 struct list_head sd_ail2_list;
657 u64 sd_ail_sync_gen;
658 656
659 /* Replay stuff */ 657 /* Replay stuff */
660 658
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/sched.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/spinlock.h> 11#include <linux/spinlock.h>
13#include <linux/completion.h> 12#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
15#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h>
19#include <linux/security.h> 21#include <linux/security.h>
20#include <linux/time.h> 22#include <asm/uaccess.h>
21 23
22#include "gfs2.h" 24#include "gfs2.h"
23#include "incore.h" 25#include "incore.h"
@@ -26,19 +28,14 @@
26#include "dir.h" 28#include "dir.h"
27#include "xattr.h" 29#include "xattr.h"
28#include "glock.h" 30#include "glock.h"
29#include "glops.h"
30#include "inode.h" 31#include "inode.h"
31#include "log.h"
32#include "meta_io.h" 32#include "meta_io.h"
33#include "quota.h" 33#include "quota.h"
34#include "rgrp.h" 34#include "rgrp.h"
35#include "trans.h" 35#include "trans.h"
36#include "util.h" 36#include "util.h"
37 37#include "super.h"
38struct gfs2_inum_range_host { 38#include "glops.h"
39 u64 ir_start;
40 u64 ir_length;
41};
42 39
43struct gfs2_skip_data { 40struct gfs2_skip_data {
44 u64 no_addr; 41 u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
74 return 0; 71 return 0;
75} 72}
76 73
77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 74struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
78{ 75{
79 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
80 struct gfs2_skip_data data; 77 struct gfs2_skip_data data;
81 78
82 data.no_addr = no_addr; 79 data.no_addr = no_addr;
83 data.skipped = 0; 80 data.skipped = 0;
84 data.non_block = 0; 81 data.non_block = non_block;
85 return ilookup5(sb, hash, iget_test, &data); 82 return ilookup5(sb, hash, iget_test, &data);
86} 83}
87 84
@@ -248,203 +245,6 @@ fail_iput:
248 goto fail; 245 goto fail;
249} 246}
250 247
251static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
252{
253 const struct gfs2_dinode *str = buf;
254 struct timespec atime;
255 u16 height, depth;
256
257 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
258 goto corrupt;
259 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
260 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
261 ip->i_inode.i_rdev = 0;
262 switch (ip->i_inode.i_mode & S_IFMT) {
263 case S_IFBLK:
264 case S_IFCHR:
265 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
266 be32_to_cpu(str->di_minor));
267 break;
268 };
269
270 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
271 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
272 /*
273 * We will need to review setting the nlink count here in the
274 * light of the forthcoming ro bind mount work. This is a reminder
275 * to do that.
276 */
277 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
278 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
280 atime.tv_sec = be64_to_cpu(str->di_atime);
281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
283 ip->i_inode.i_atime = atime;
284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
287 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
288
289 ip->i_goal = be64_to_cpu(str->di_goal_meta);
290 ip->i_generation = be64_to_cpu(str->di_generation);
291
292 ip->i_diskflags = be32_to_cpu(str->di_flags);
293 gfs2_set_inode_flags(&ip->i_inode);
294 height = be16_to_cpu(str->di_height);
295 if (unlikely(height > GFS2_MAX_META_HEIGHT))
296 goto corrupt;
297 ip->i_height = (u8)height;
298
299 depth = be16_to_cpu(str->di_depth);
300 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
301 goto corrupt;
302 ip->i_depth = (u8)depth;
303 ip->i_entries = be32_to_cpu(str->di_entries);
304
305 ip->i_eattr = be64_to_cpu(str->di_eattr);
306 if (S_ISREG(ip->i_inode.i_mode))
307 gfs2_set_aops(&ip->i_inode);
308
309 return 0;
310corrupt:
311 if (gfs2_consist_inode(ip))
312 gfs2_dinode_print(ip);
313 return -EIO;
314}
315
316/**
317 * gfs2_inode_refresh - Refresh the incore copy of the dinode
318 * @ip: The GFS2 inode
319 *
320 * Returns: errno
321 */
322
323int gfs2_inode_refresh(struct gfs2_inode *ip)
324{
325 struct buffer_head *dibh;
326 int error;
327
328 error = gfs2_meta_inode_buffer(ip, &dibh);
329 if (error)
330 return error;
331
332 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
333 brelse(dibh);
334 return -EIO;
335 }
336
337 error = gfs2_dinode_in(ip, dibh->b_data);
338 brelse(dibh);
339 clear_bit(GIF_INVALID, &ip->i_flags);
340
341 return error;
342}
343
344int gfs2_dinode_dealloc(struct gfs2_inode *ip)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
347 struct gfs2_alloc *al;
348 struct gfs2_rgrpd *rgd;
349 int error;
350
351 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
352 if (gfs2_consist_inode(ip))
353 gfs2_dinode_print(ip);
354 return -EIO;
355 }
356
357 al = gfs2_alloc_get(ip);
358 if (!al)
359 return -ENOMEM;
360
361 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
362 if (error)
363 goto out;
364
365 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
366 if (error)
367 goto out_qs;
368
369 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
370 if (!rgd) {
371 gfs2_consist_inode(ip);
372 error = -EIO;
373 goto out_rindex_relse;
374 }
375
376 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
377 &al->al_rgd_gh);
378 if (error)
379 goto out_rindex_relse;
380
381 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
382 if (error)
383 goto out_rg_gunlock;
384
385 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
386 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
387
388 gfs2_free_di(rgd, ip);
389
390 gfs2_trans_end(sdp);
391
392out_rg_gunlock:
393 gfs2_glock_dq_uninit(&al->al_rgd_gh);
394out_rindex_relse:
395 gfs2_glock_dq_uninit(&al->al_ri_gh);
396out_qs:
397 gfs2_quota_unhold(ip);
398out:
399 gfs2_alloc_put(ip);
400 return error;
401}
402
403/**
404 * gfs2_change_nlink - Change nlink count on inode
405 * @ip: The GFS2 inode
406 * @diff: The change in the nlink count required
407 *
408 * Returns: errno
409 */
410int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
411{
412 struct buffer_head *dibh;
413 u32 nlink;
414 int error;
415
416 BUG_ON(diff != 1 && diff != -1);
417 nlink = ip->i_inode.i_nlink + diff;
418
419 /* If we are reducing the nlink count, but the new value ends up being
420 bigger than the old one, we must have underflowed. */
421 if (diff < 0 && nlink > ip->i_inode.i_nlink) {
422 if (gfs2_consist_inode(ip))
423 gfs2_dinode_print(ip);
424 return -EIO;
425 }
426
427 error = gfs2_meta_inode_buffer(ip, &dibh);
428 if (error)
429 return error;
430
431 if (diff > 0)
432 inc_nlink(&ip->i_inode);
433 else
434 drop_nlink(&ip->i_inode);
435
436 ip->i_inode.i_ctime = CURRENT_TIME;
437
438 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
439 gfs2_dinode_out(ip, dibh->b_data);
440 brelse(dibh);
441 mark_inode_dirty(&ip->i_inode);
442
443 if (ip->i_inode.i_nlink == 0)
444 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
445
446 return error;
447}
448 248
449struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
450{ 250{
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
543 343
544 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
545 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
546 return -EPERM; 346 return -ENOENT;
547 347
548 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
549 switch (error) { 349 switch (error) {
@@ -613,21 +413,44 @@ out:
613 return error; 413 return error;
614} 414}
615 415
416static void gfs2_init_dir(struct buffer_head *dibh,
417 const struct gfs2_inode *parent)
418{
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421
422 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
423 dent->de_inum = di->di_num; /* already GFS2 endian */
424 dent->de_type = cpu_to_be16(DT_DIR);
425
426 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
427 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
428 gfs2_inum_out(parent, dent);
429 dent->de_type = cpu_to_be16(DT_DIR);
430
431}
432
616/** 433/**
617 * init_dinode - Fill in a new dinode structure 434 * init_dinode - Fill in a new dinode structure
618 * @dip: the directory this inode is being created in 435 * @dip: The directory this inode is being created in
619 * @gl: The glock covering the new inode 436 * @gl: The glock covering the new inode
620 * @inum: the inode number 437 * @inum: The inode number
621 * @mode: the file permissions 438 * @mode: The file permissions
622 * @uid: 439 * @uid: The uid of the new inode
623 * @gid: 440 * @gid: The gid of the new inode
441 * @generation: The generation number of the new inode
442 * @dev: The device number (if a device node)
443 * @symname: The symlink destination (if a symlink)
444 * @size: The inode size (ignored for directories)
445 * @bhp: The buffer head (returned to caller)
624 * 446 *
625 */ 447 */
626 448
627static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
628 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, unsigned int mode,
629 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
630 const u64 *generation, dev_t dev, struct buffer_head **bhp) 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp)
631{ 454{
632 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
633 struct gfs2_dinode *di; 456 struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
646 di->di_uid = cpu_to_be32(uid); 469 di->di_uid = cpu_to_be32(uid);
647 di->di_gid = cpu_to_be32(gid); 470 di->di_gid = cpu_to_be32(gid);
648 di->di_nlink = 0; 471 di->di_nlink = 0;
649 di->di_size = 0; 472 di->di_size = cpu_to_be64(size);
650 di->di_blocks = cpu_to_be64(1); 473 di->di_blocks = cpu_to_be64(1);
651 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
652 di->di_major = cpu_to_be32(MAJOR(dev)); 475 di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
654 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
655 di->di_generation = cpu_to_be64(*generation); 478 di->di_generation = cpu_to_be64(*generation);
656 di->di_flags = 0; 479 di->di_flags = 0;
657
658 if (S_ISREG(mode)) {
659 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
660 gfs2_tune_get(sdp, gt_new_files_jdata))
661 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
662 } else if (S_ISDIR(mode)) {
663 di->di_flags |= cpu_to_be32(dip->i_diskflags &
664 GFS2_DIF_INHERIT_JDATA);
665 }
666
667 di->__pad1 = 0; 480 di->__pad1 = 0;
668 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 481 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
669 di->di_height = 0; 482 di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
677 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 490 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
678 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 491 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
679 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 492 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
680 493
494 switch(mode & S_IFMT) {
495 case S_IFREG:
496 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
497 gfs2_tune_get(sdp, gt_new_files_jdata))
498 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
499 break;
500 case S_IFDIR:
501 di->di_flags |= cpu_to_be32(dip->i_diskflags &
502 GFS2_DIF_INHERIT_JDATA);
503 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
504 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
505 di->di_entries = cpu_to_be32(2);
506 gfs2_init_dir(dibh, dip);
507 break;
508 case S_IFLNK:
509 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
510 break;
511 }
512
681 set_buffer_uptodate(dibh); 513 set_buffer_uptodate(dibh);
682 514
683 *bhp = dibh; 515 *bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
685 517
686static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
687 unsigned int mode, const struct gfs2_inum_host *inum, 519 unsigned int mode, const struct gfs2_inum_host *inum,
688 const u64 *generation, dev_t dev, struct buffer_head **bhp) 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp)
689{ 522{
690 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 523 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
691 unsigned int uid, gid; 524 unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
707 if (error) 540 if (error)
708 goto out_quota; 541 goto out_quota;
709 542
710 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 543 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
711 gfs2_quota_change(dip, +1, uid, gid); 544 gfs2_quota_change(dip, +1, uid, gid);
712 gfs2_trans_end(sdp); 545 gfs2_trans_end(sdp);
713 546
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
761 goto fail_quota_locks; 594 goto fail_quota_locks;
762 } 595 }
763 596
764 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 597 error = gfs2_dir_add(&dip->i_inode, name, ip);
765 if (error) 598 if (error)
766 goto fail_end_trans; 599 goto fail_end_trans;
767 600
768 error = gfs2_meta_inode_buffer(ip, &dibh); 601 error = gfs2_meta_inode_buffer(ip, &dibh);
769 if (error) 602 if (error)
770 goto fail_end_trans; 603 goto fail_end_trans;
771 ip->i_inode.i_nlink = 1; 604 inc_nlink(&ip->i_inode);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
772 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 607 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
773 gfs2_dinode_out(ip, dibh->b_data); 608 gfs2_dinode_out(ip, dibh->b_data);
774 brelse(dibh); 609 brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
815} 650}
816 651
817/** 652/**
818 * gfs2_createi - Create a new inode 653 * gfs2_create_inode - Create a new inode
819 * @ghs: An array of two holders 654 * @dir: The parent directory
820 * @name: The name of the new file 655 * @dentry: The new dentry
821 * @mode: the permissions on the new inode 656 * @mode: The permissions on the new inode
822 * 657 * @dev: For device nodes, this is the device number
823 * @ghs[0] is an initialized holder for the directory 658 * @symname: For symlinks, this is the link destination
824 * @ghs[1] is the holder for the inode lock 659 * @size: The initial size of the inode (ignored for directories)
825 * 660 *
826 * If the return value is not NULL, the glocks on both the directory and the new 661 * Returns: 0 on success, or error code
827 * file are held. A transaction has been started and an inplace reservation
828 * is held, as well.
829 *
830 * Returns: An inode
831 */ 662 */
832 663
833struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 664static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
834 unsigned int mode, dev_t dev) 665 unsigned int mode, dev_t dev, const char *symname,
666 unsigned int size)
835{ 667{
668 const struct qstr *name = &dentry->d_name;
669 struct gfs2_holder ghs[2];
836 struct inode *inode = NULL; 670 struct inode *inode = NULL;
837 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 671 struct gfs2_inode *dip = GFS2_I(dir);
838 struct inode *dir = &dip->i_inode;
839 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 672 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
840 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 673 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
841 int error; 674 int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 struct buffer_head *bh = NULL; 676 struct buffer_head *bh = NULL;
844 677
845 if (!name->len || name->len > GFS2_FNAMESIZE) 678 if (!name->len || name->len > GFS2_FNAMESIZE)
846 return ERR_PTR(-ENAMETOOLONG); 679 return -ENAMETOOLONG;
847 680
848 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 681 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
849 error = gfs2_glock_nq(ghs);
850 if (error) 682 if (error)
851 goto fail; 683 goto fail;
852 684
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 696 if (error)
865 goto fail_gunlock; 697 goto fail_gunlock;
866 698
867 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 699 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
868 if (error) 700 if (error)
869 goto fail_gunlock2; 701 goto fail_gunlock2;
870 702
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
891 723
892 if (bh) 724 if (bh)
893 brelse(bh); 725 brelse(bh);
894 return inode; 726
727 gfs2_trans_end(sdp);
728 if (dip->i_alloc->al_rgd)
729 gfs2_inplace_release(dip);
730 gfs2_quota_unlock(dip);
731 gfs2_alloc_put(dip);
732 gfs2_glock_dq_uninit_m(2, ghs);
733 mark_inode_dirty(inode);
734 d_instantiate(dentry, inode);
735 return 0;
895 736
896fail_gunlock2: 737fail_gunlock2:
897 gfs2_glock_dq_uninit(ghs + 1); 738 gfs2_glock_dq_uninit(ghs + 1);
898 if (inode && !IS_ERR(inode)) 739 if (inode && !IS_ERR(inode))
899 iput(inode); 740 iput(inode);
900fail_gunlock: 741fail_gunlock:
901 gfs2_glock_dq(ghs); 742 gfs2_glock_dq_uninit(ghs);
902fail: 743fail:
903 if (bh) 744 if (bh)
904 brelse(bh); 745 brelse(bh);
905 return ERR_PTR(error); 746 return error;
747}
748
749/**
750 * gfs2_create - Create a file
751 * @dir: The directory in which to create the file
752 * @dentry: The dentry of the new file
753 * @mode: The mode of the new file
754 *
755 * Returns: errno
756 */
757
758static int gfs2_create(struct inode *dir, struct dentry *dentry,
759 int mode, struct nameidata *nd)
760{
761 struct inode *inode;
762 int ret;
763
764 for (;;) {
765 ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
766 if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
767 return ret;
768
769 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
770 if (inode) {
771 if (!IS_ERR(inode))
772 break;
773 return PTR_ERR(inode);
774 }
775 }
776
777 d_instantiate(dentry, inode);
778 return 0;
779}
780
781/**
782 * gfs2_lookup - Look up a filename in a directory and return its inode
783 * @dir: The directory inode
784 * @dentry: The dentry of the new inode
785 * @nd: passed from Linux VFS, ignored by us
786 *
787 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
788 *
789 * Returns: errno
790 */
791
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd)
794{
795 struct inode *inode = NULL;
796
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh;
804 int error;
805 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
806 if (error) {
807 iput(inode);
808 return ERR_PTR(error);
809 }
810 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 }
813 d_add(dentry, inode);
814
815 return NULL;
816}
817
818/**
819 * gfs2_link - Link to a file
820 * @old_dentry: The inode to link
821 * @dir: Add link to this directory
822 * @dentry: The name of the link
823 *
824 * Link the inode in "old_dentry" into the directory "dir" with the
825 * name in "dentry".
826 *
827 * Returns: errno
828 */
829
830static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
831 struct dentry *dentry)
832{
833 struct gfs2_inode *dip = GFS2_I(dir);
834 struct gfs2_sbd *sdp = GFS2_SB(dir);
835 struct inode *inode = old_dentry->d_inode;
836 struct gfs2_inode *ip = GFS2_I(inode);
837 struct gfs2_holder ghs[2];
838 struct buffer_head *dibh;
839 int alloc_required;
840 int error;
841
842 if (S_ISDIR(inode->i_mode))
843 return -EPERM;
844
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847
848 error = gfs2_glock_nq(ghs); /* parent */
849 if (error)
850 goto out_parent;
851
852 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error)
854 goto out_child;
855
856 error = -ENOENT;
857 if (inode->i_nlink == 0)
858 goto out_gunlock;
859
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
861 if (error)
862 goto out_gunlock;
863
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) {
866 case -ENOENT:
867 break;
868 case 0:
869 error = -EEXIST;
870 default:
871 goto out_gunlock;
872 }
873
874 error = -EINVAL;
875 if (!dip->i_inode.i_nlink)
876 goto out_gunlock;
877 error = -EFBIG;
878 if (dip->i_entries == (u32)-1)
879 goto out_gunlock;
880 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock;
883 error = -EINVAL;
884 if (!ip->i_inode.i_nlink)
885 goto out_gunlock;
886 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock;
889
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0)
892 goto out_gunlock;
893 error = 0;
894
895 if (alloc_required) {
896 struct gfs2_alloc *al = gfs2_alloc_get(dip);
897 if (!al) {
898 error = -ENOMEM;
899 goto out_gunlock;
900 }
901
902 error = gfs2_quota_lock_check(dip);
903 if (error)
904 goto out_alloc;
905
906 al->al_requested = sdp->sd_max_dirres;
907
908 error = gfs2_inplace_reserve(dip);
909 if (error)
910 goto out_gunlock_q;
911
912 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
913 gfs2_rg_blocks(al) +
914 2 * RES_DINODE + RES_STATFS +
915 RES_QUOTA, 0);
916 if (error)
917 goto out_ipres;
918 } else {
919 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
920 if (error)
921 goto out_ipres;
922 }
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out_end_trans;
927
928 error = gfs2_dir_add(dir, &dentry->d_name, ip);
929 if (error)
930 goto out_brelse;
931
932 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
933 inc_nlink(&ip->i_inode);
934 ip->i_inode.i_ctime = CURRENT_TIME;
935 gfs2_dinode_out(ip, dibh->b_data);
936 mark_inode_dirty(&ip->i_inode);
937
938out_brelse:
939 brelse(dibh);
940out_end_trans:
941 gfs2_trans_end(sdp);
942out_ipres:
943 if (alloc_required)
944 gfs2_inplace_release(dip);
945out_gunlock_q:
946 if (alloc_required)
947 gfs2_quota_unlock(dip);
948out_alloc:
949 if (alloc_required)
950 gfs2_alloc_put(dip);
951out_gunlock:
952 gfs2_glock_dq(ghs + 1);
953out_child:
954 gfs2_glock_dq(ghs);
955out_parent:
956 gfs2_holder_uninit(ghs);
957 gfs2_holder_uninit(ghs + 1);
958 if (!error) {
959 ihold(inode);
960 d_instantiate(dentry, inode);
961 mark_inode_dirty(inode);
962 }
963 return error;
964}
965
966/*
967 * gfs2_unlink_ok - check to see that a inode is still in a directory
968 * @dip: the directory
969 * @name: the name of the file
970 * @ip: the inode
971 *
972 * Assumes that the lock on (at least) @dip is held.
973 *
974 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
975 */
976
977static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 const struct gfs2_inode *ip)
979{
980 int error;
981
982 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
983 return -EPERM;
984
985 if ((dip->i_inode.i_mode & S_ISVTX) &&
986 dip->i_inode.i_uid != current_fsuid() &&
987 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
988 return -EPERM;
989
990 if (IS_APPEND(&dip->i_inode))
991 return -EPERM;
992
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
994 if (error)
995 return error;
996
997 error = gfs2_dir_check(&dip->i_inode, name, ip);
998 if (error)
999 return error;
1000
1001 return 0;
1002}
1003
1004/**
1005 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
1006 * @dip: The parent directory
1007 * @name: The name of the entry in the parent directory
1008 * @bh: The inode buffer for the inode to be removed
1009 * @inode: The inode to be removed
1010 *
1011 * Called with all the locks and in a transaction. This will only be
1012 * called for a directory after it has been checked to ensure it is empty.
1013 *
1014 * Returns: 0 on success, or an error
1015 */
1016
1017static int gfs2_unlink_inode(struct gfs2_inode *dip,
1018 const struct dentry *dentry,
1019 struct buffer_head *bh)
1020{
1021 struct inode *inode = dentry->d_inode;
1022 struct gfs2_inode *ip = GFS2_I(inode);
1023 int error;
1024
1025 error = gfs2_dir_del(dip, dentry);
1026 if (error)
1027 return error;
1028
1029 ip->i_entries = 0;
1030 inode->i_ctime = CURRENT_TIME;
1031 if (S_ISDIR(inode->i_mode))
1032 clear_nlink(inode);
1033 else
1034 drop_nlink(inode);
1035 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1036 gfs2_dinode_out(ip, bh->b_data);
1037 mark_inode_dirty(inode);
1038 if (inode->i_nlink == 0)
1039 gfs2_unlink_di(inode);
1040 return 0;
1041}
1042
1043
1044/**
1045 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1046 * @dir: The inode of the directory containing the inode to unlink
1047 * @dentry: The file itself
1048 *
1049 * This routine uses the type of the inode as a flag to figure out
1050 * whether this is an unlink or an rmdir.
1051 *
1052 * Returns: errno
1053 */
1054
1055static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1056{
1057 struct gfs2_inode *dip = GFS2_I(dir);
1058 struct gfs2_sbd *sdp = GFS2_SB(dir);
1059 struct inode *inode = dentry->d_inode;
1060 struct gfs2_inode *ip = GFS2_I(inode);
1061 struct buffer_head *bh;
1062 struct gfs2_holder ghs[3];
1063 struct gfs2_rgrpd *rgd;
1064 struct gfs2_holder ri_gh;
1065 int error;
1066
1067 error = gfs2_rindex_hold(sdp, &ri_gh);
1068 if (error)
1069 return error;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1072 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1073
1074 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1075 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1076
1077
1078 error = gfs2_glock_nq(ghs); /* parent */
1079 if (error)
1080 goto out_parent;
1081
1082 error = gfs2_glock_nq(ghs + 1); /* child */
1083 if (error)
1084 goto out_child;
1085
1086 error = -ENOENT;
1087 if (inode->i_nlink == 0)
1088 goto out_rgrp;
1089
1090 if (S_ISDIR(inode->i_mode)) {
1091 error = -ENOTEMPTY;
1092 if (ip->i_entries > 2 || inode->i_nlink > 2)
1093 goto out_rgrp;
1094 }
1095
1096 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1097 if (error)
1098 goto out_rgrp;
1099
1100 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1101 if (error)
1102 goto out_gunlock;
1103
1104 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1105 if (error)
1106 goto out_gunlock;
1107
1108 error = gfs2_meta_inode_buffer(ip, &bh);
1109 if (error)
1110 goto out_end_trans;
1111
1112 error = gfs2_unlink_inode(dip, dentry, bh);
1113 brelse(bh);
1114
1115out_end_trans:
1116 gfs2_trans_end(sdp);
1117out_gunlock:
1118 gfs2_glock_dq(ghs + 2);
1119out_rgrp:
1120 gfs2_holder_uninit(ghs + 2);
1121 gfs2_glock_dq(ghs + 1);
1122out_child:
1123 gfs2_holder_uninit(ghs + 1);
1124 gfs2_glock_dq(ghs);
1125out_parent:
1126 gfs2_holder_uninit(ghs);
1127 gfs2_glock_dq_uninit(&ri_gh);
1128 return error;
1129}
1130
1131/**
1132 * gfs2_symlink - Create a symlink
1133 * @dir: The directory to create the symlink in
1134 * @dentry: The dentry to put the symlink in
1135 * @symname: The thing which the link points to
1136 *
1137 * Returns: errno
1138 */
1139
1140static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1141 const char *symname)
1142{
1143 struct gfs2_sbd *sdp = GFS2_SB(dir);
1144 unsigned int size;
1145
1146 size = strlen(symname);
1147 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1148 return -ENAMETOOLONG;
1149
1150 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
1151}
1152
1153/**
1154 * gfs2_mkdir - Make a directory
1155 * @dir: The parent directory of the new one
1156 * @dentry: The dentry of the new directory
1157 * @mode: The mode of the new directory
1158 *
1159 * Returns: errno
1160 */
1161
1162static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1163{
1164 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
1165}
1166
1167/**
1168 * gfs2_mknod - Make a special file
1169 * @dir: The directory in which the special file will reside
1170 * @dentry: The dentry of the special file
1171 * @mode: The mode of the special file
1172 * @dev: The device specification of the special file
1173 *
1174 */
1175
1176static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
1177 dev_t dev)
1178{
1179 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
1180}
1181
1182/*
1183 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1184 * @this: move this
1185 * @to: to here
1186 *
1187 * Follow @to back to the root and make sure we don't encounter @this
1188 * Assumes we already hold the rename lock.
1189 *
1190 * Returns: errno
1191 */
1192
1193static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1194{
1195 struct inode *dir = &to->i_inode;
1196 struct super_block *sb = dir->i_sb;
1197 struct inode *tmp;
1198 int error = 0;
1199
1200 igrab(dir);
1201
1202 for (;;) {
1203 if (dir == &this->i_inode) {
1204 error = -EINVAL;
1205 break;
1206 }
1207 if (dir == sb->s_root->d_inode) {
1208 error = 0;
1209 break;
1210 }
1211
1212 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1213 if (IS_ERR(tmp)) {
1214 error = PTR_ERR(tmp);
1215 break;
1216 }
1217
1218 iput(dir);
1219 dir = tmp;
1220 }
1221
1222 iput(dir);
1223
1224 return error;
1225}
1226
1227/**
1228 * gfs2_rename - Rename a file
1229 * @odir: Parent directory of old file name
1230 * @odentry: The old dentry of the file
1231 * @ndir: Parent directory of new file name
1232 * @ndentry: The new dentry of the file
1233 *
1234 * Returns: errno
1235 */
1236
1237static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1238 struct inode *ndir, struct dentry *ndentry)
1239{
1240 struct gfs2_inode *odip = GFS2_I(odir);
1241 struct gfs2_inode *ndip = GFS2_I(ndir);
1242 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1243 struct gfs2_inode *nip = NULL;
1244 struct gfs2_sbd *sdp = GFS2_SB(odir);
1245 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
1246 struct gfs2_rgrpd *nrgd;
1247 unsigned int num_gh;
1248 int dir_rename = 0;
1249 int alloc_required = 0;
1250 unsigned int x;
1251 int error;
1252
1253 if (ndentry->d_inode) {
1254 nip = GFS2_I(ndentry->d_inode);
1255 if (ip == nip)
1256 return 0;
1257 }
1258
1259 error = gfs2_rindex_hold(sdp, &ri_gh);
1260 if (error)
1261 return error;
1262
1263 if (odip != ndip) {
1264 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1265 0, &r_gh);
1266 if (error)
1267 goto out;
1268
1269 if (S_ISDIR(ip->i_inode.i_mode)) {
1270 dir_rename = 1;
1271 /* don't move a dirctory into it's subdir */
1272 error = gfs2_ok_to_move(ip, ndip);
1273 if (error)
1274 goto out_gunlock_r;
1275 }
1276 }
1277
1278 num_gh = 1;
1279 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1280 if (odip != ndip) {
1281 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1282 num_gh++;
1283 }
1284 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1285 num_gh++;
1286
1287 if (nip) {
1288 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1289 num_gh++;
1290 /* grab the resource lock for unlink flag twiddling
1291 * this is the case of the target file already existing
1292 * so we unlink before doing the rename
1293 */
1294 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
1295 if (nrgd)
1296 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1297 }
1298
1299 for (x = 0; x < num_gh; x++) {
1300 error = gfs2_glock_nq(ghs + x);
1301 if (error)
1302 goto out_gunlock;
1303 }
1304
1305 error = -ENOENT;
1306 if (ip->i_inode.i_nlink == 0)
1307 goto out_gunlock;
1308
1309 /* Check out the old directory */
1310
1311 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1312 if (error)
1313 goto out_gunlock;
1314
1315 /* Check out the new directory */
1316
1317 if (nip) {
1318 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1319 if (error)
1320 goto out_gunlock;
1321
1322 if (nip->i_inode.i_nlink == 0) {
1323 error = -EAGAIN;
1324 goto out_gunlock;
1325 }
1326
1327 if (S_ISDIR(nip->i_inode.i_mode)) {
1328 if (nip->i_entries < 2) {
1329 gfs2_consist_inode(nip);
1330 error = -EIO;
1331 goto out_gunlock;
1332 }
1333 if (nip->i_entries > 2) {
1334 error = -ENOTEMPTY;
1335 goto out_gunlock;
1336 }
1337 }
1338 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
1340 if (error)
1341 goto out_gunlock;
1342
1343 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1344 switch (error) {
1345 case -ENOENT:
1346 error = 0;
1347 break;
1348 case 0:
1349 error = -EEXIST;
1350 default:
1351 goto out_gunlock;
1352 };
1353
1354 if (odip != ndip) {
1355 if (!ndip->i_inode.i_nlink) {
1356 error = -ENOENT;
1357 goto out_gunlock;
1358 }
1359 if (ndip->i_entries == (u32)-1) {
1360 error = -EFBIG;
1361 goto out_gunlock;
1362 }
1363 if (S_ISDIR(ip->i_inode.i_mode) &&
1364 ndip->i_inode.i_nlink == (u32)-1) {
1365 error = -EMLINK;
1366 goto out_gunlock;
1367 }
1368 }
1369 }
1370
1371 /* Check out the dir to be renamed */
1372
1373 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
1375 if (error)
1376 goto out_gunlock;
1377 }
1378
1379 if (nip == NULL)
1380 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1381 error = alloc_required;
1382 if (error < 0)
1383 goto out_gunlock;
1384 error = 0;
1385
1386 if (alloc_required) {
1387 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
1388 if (!al) {
1389 error = -ENOMEM;
1390 goto out_gunlock;
1391 }
1392
1393 error = gfs2_quota_lock_check(ndip);
1394 if (error)
1395 goto out_alloc;
1396
1397 al->al_requested = sdp->sd_max_dirres;
1398
1399 error = gfs2_inplace_reserve_ri(ndip);
1400 if (error)
1401 goto out_gunlock_q;
1402
1403 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1404 gfs2_rg_blocks(al) +
1405 4 * RES_DINODE + 4 * RES_LEAF +
1406 RES_STATFS + RES_QUOTA + 4, 0);
1407 if (error)
1408 goto out_ipreserv;
1409 } else {
1410 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1411 5 * RES_LEAF + 4, 0);
1412 if (error)
1413 goto out_gunlock;
1414 }
1415
1416 /* Remove the target file, if it exists */
1417
1418 if (nip) {
1419 struct buffer_head *bh;
1420 error = gfs2_meta_inode_buffer(nip, &bh);
1421 if (error)
1422 goto out_end_trans;
1423 error = gfs2_unlink_inode(ndip, ndentry, bh);
1424 brelse(bh);
1425 }
1426
1427 if (dir_rename) {
1428 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1429 if (error)
1430 goto out_end_trans;
1431 } else {
1432 struct buffer_head *dibh;
1433 error = gfs2_meta_inode_buffer(ip, &dibh);
1434 if (error)
1435 goto out_end_trans;
1436 ip->i_inode.i_ctime = CURRENT_TIME;
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh);
1440 }
1441
1442 error = gfs2_dir_del(odip, odentry);
1443 if (error)
1444 goto out_end_trans;
1445
1446 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1447 if (error)
1448 goto out_end_trans;
1449
1450out_end_trans:
1451 gfs2_trans_end(sdp);
1452out_ipreserv:
1453 if (alloc_required)
1454 gfs2_inplace_release(ndip);
1455out_gunlock_q:
1456 if (alloc_required)
1457 gfs2_quota_unlock(ndip);
1458out_alloc:
1459 if (alloc_required)
1460 gfs2_alloc_put(ndip);
1461out_gunlock:
1462 while (x--) {
1463 gfs2_glock_dq(ghs + x);
1464 gfs2_holder_uninit(ghs + x);
1465 }
1466out_gunlock_r:
1467 if (r_gh.gh_gl)
1468 gfs2_glock_dq_uninit(&r_gh);
1469out:
1470 gfs2_glock_dq_uninit(&ri_gh);
1471 return error;
1472}
1473
1474/**
1475 * gfs2_follow_link - Follow a symbolic link
1476 * @dentry: The dentry of the link
1477 * @nd: Data that we pass to vfs_follow_link()
1478 *
1479 * This can handle symlinks of any size.
1480 *
1481 * Returns: 0 on success or error code
1482 */
1483
1484static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1485{
1486 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1487 struct gfs2_holder i_gh;
1488 struct buffer_head *dibh;
1489 unsigned int size;
1490 char *buf;
1491 int error;
1492
1493 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1494 error = gfs2_glock_nq(&i_gh);
1495 if (error) {
1496 gfs2_holder_uninit(&i_gh);
1497 nd_set_link(nd, ERR_PTR(error));
1498 return NULL;
1499 }
1500
1501 size = (unsigned int)i_size_read(&ip->i_inode);
1502 if (size == 0) {
1503 gfs2_consist_inode(ip);
1504 buf = ERR_PTR(-EIO);
1505 goto out;
1506 }
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (error) {
1510 buf = ERR_PTR(error);
1511 goto out;
1512 }
1513
1514 buf = kzalloc(size + 1, GFP_NOFS);
1515 if (!buf)
1516 buf = ERR_PTR(-ENOMEM);
1517 else
1518 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1519 brelse(dibh);
1520out:
1521 gfs2_glock_dq_uninit(&i_gh);
1522 nd_set_link(nd, buf);
1523 return NULL;
1524}
1525
1526static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1527{
1528 char *s = nd_get_link(nd);
1529 if (!IS_ERR(s))
1530 kfree(s);
1531}
1532
1533/**
1534 * gfs2_permission -
1535 * @inode: The inode
1536 * @mask: The mask to be tested
1537 * @flags: Indicates whether this is an RCU path walk or not
1538 *
1539 * This may be called from the VFS directly, or from within GFS2 with the
1540 * inode locked, so we look to see if the glock is already locked and only
1541 * lock the glock if its not already been done.
1542 *
1543 * Returns: errno
1544 */
1545
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1547{
1548 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh;
1550 int error;
1551 int unlock = 0;
1552
1553
1554 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU)
1557 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error)
1560 return error;
1561 unlock = 1;
1562 }
1563
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES;
1566 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1568 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh);
1570
1571 return error;
906} 1572}
907 1573
908static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1574static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
928 * @ip: 1594 * @ip:
929 * @attr: 1595 * @attr:
930 * 1596 *
931 * Called with a reference on the vnode.
932 *
933 * Returns: errno 1597 * Returns: errno
934 */ 1598 */
935 1599
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
949 return error; 1613 return error;
950} 1614}
951 1615
952void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1616static int setattr_chown(struct inode *inode, struct iattr *attr)
953{ 1617{
954 struct gfs2_dinode *str = buf; 1618 struct gfs2_inode *ip = GFS2_I(inode);
955 1619 struct gfs2_sbd *sdp = GFS2_SB(inode);
956 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1620 u32 ouid, ogid, nuid, ngid;
957 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1621 int error;
958 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1622
959 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1623 ouid = inode->i_uid;
960 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1624 ogid = inode->i_gid;
961 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1625 nuid = attr->ia_uid;
962 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1626 ngid = attr->ia_gid;
963 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1627
964 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1628 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
965 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 1629 ouid = nuid = NO_QUOTA_CHANGE;
966 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1630 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
967 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1631 ogid = ngid = NO_QUOTA_CHANGE;
968 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1632
969 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1633 if (!gfs2_alloc_get(ip))
970 1634 return -ENOMEM;
971 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1635
972 str->di_goal_data = cpu_to_be64(ip->i_goal); 1636 error = gfs2_quota_lock(ip, nuid, ngid);
973 str->di_generation = cpu_to_be64(ip->i_generation); 1637 if (error)
974 1638 goto out_alloc;
975 str->di_flags = cpu_to_be32(ip->i_diskflags); 1639
976 str->di_height = cpu_to_be16(ip->i_height); 1640 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
977 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1641 error = gfs2_quota_check(ip, nuid, ngid);
978 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1642 if (error)
979 GFS2_FORMAT_DE : 0); 1643 goto out_gunlock_q;
980 str->di_depth = cpu_to_be16(ip->i_depth); 1644 }
981 str->di_entries = cpu_to_be32(ip->i_entries); 1645
982 1646 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
983 str->di_eattr = cpu_to_be64(ip->i_eattr); 1647 if (error)
984 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1648 goto out_gunlock_q;
985 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1649
986 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1650 error = gfs2_setattr_simple(ip, attr);
987} 1651 if (error)
988 1652 goto out_end_trans;
989void gfs2_dinode_print(const struct gfs2_inode *ip) 1653
990{ 1654 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
991 printk(KERN_INFO " no_formal_ino = %llu\n", 1655 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
992 (unsigned long long)ip->i_no_formal_ino); 1656 gfs2_quota_change(ip, -blocks, ouid, ogid);
993 printk(KERN_INFO " no_addr = %llu\n", 1657 gfs2_quota_change(ip, blocks, nuid, ngid);
994 (unsigned long long)ip->i_no_addr); 1658 }
995 printk(KERN_INFO " i_size = %llu\n", 1659
996 (unsigned long long)i_size_read(&ip->i_inode)); 1660out_end_trans:
997 printk(KERN_INFO " blocks = %llu\n", 1661 gfs2_trans_end(sdp);
998 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1662out_gunlock_q:
999 printk(KERN_INFO " i_goal = %llu\n", 1663 gfs2_quota_unlock(ip);
1000 (unsigned long long)ip->i_goal); 1664out_alloc:
1001 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1665 gfs2_alloc_put(ip);
1002 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1666 return error;
1003 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1667}
1004 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1668
1005 printk(KERN_INFO " i_eattr = %llu\n", 1669/**
1006 (unsigned long long)ip->i_eattr); 1670 * gfs2_setattr - Change attributes on an inode
1671 * @dentry: The dentry which is changing
1672 * @attr: The structure describing the change
1673 *
1674 * The VFS layer wants to change one or more of an inodes attributes. Write
1675 * that change out to disk.
1676 *
1677 * Returns: errno
1678 */
1679
1680static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1681{
1682 struct inode *inode = dentry->d_inode;
1683 struct gfs2_inode *ip = GFS2_I(inode);
1684 struct gfs2_holder i_gh;
1685 int error;
1686
1687 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1688 if (error)
1689 return error;
1690
1691 error = -EPERM;
1692 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1693 goto out;
1694
1695 error = inode_change_ok(inode, attr);
1696 if (error)
1697 goto out;
1698
1699 if (attr->ia_valid & ATTR_SIZE)
1700 error = gfs2_setattr_size(inode, attr->ia_size);
1701 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1702 error = setattr_chown(inode, attr);
1703 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1704 error = gfs2_acl_chmod(ip, attr);
1705 else
1706 error = gfs2_setattr_simple(ip, attr);
1707
1708out:
1709 gfs2_glock_dq_uninit(&i_gh);
1710 if (!error)
1711 mark_inode_dirty(inode);
1712 return error;
1713}
1714
1715/**
1716 * gfs2_getattr - Read out an inode's attributes
1717 * @mnt: The vfsmount the inode is being accessed from
1718 * @dentry: The dentry to stat
1719 * @stat: The inode's stats
1720 *
1721 * This may be called from the VFS directly, or from within GFS2 with the
1722 * inode locked, so we look to see if the glock is already locked and only
1723 * lock the glock if its not already been done. Note that its the NFS
1724 * readdirplus operation which causes this to be called (from filldir)
1725 * with the glock already held.
1726 *
1727 * Returns: errno
1728 */
1729
1730static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1731 struct kstat *stat)
1732{
1733 struct inode *inode = dentry->d_inode;
1734 struct gfs2_inode *ip = GFS2_I(inode);
1735 struct gfs2_holder gh;
1736 int error;
1737 int unlock = 0;
1738
1739 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1741 if (error)
1742 return error;
1743 unlock = 1;
1744 }
1745
1746 generic_fillattr(inode, stat);
1747 if (unlock)
1748 gfs2_glock_dq_uninit(&gh);
1749
1750 return 0;
1751}
1752
1753static int gfs2_setxattr(struct dentry *dentry, const char *name,
1754 const void *data, size_t size, int flags)
1755{
1756 struct inode *inode = dentry->d_inode;
1757 struct gfs2_inode *ip = GFS2_I(inode);
1758 struct gfs2_holder gh;
1759 int ret;
1760
1761 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1762 ret = gfs2_glock_nq(&gh);
1763 if (ret == 0) {
1764 ret = generic_setxattr(dentry, name, data, size, flags);
1765 gfs2_glock_dq(&gh);
1766 }
1767 gfs2_holder_uninit(&gh);
1768 return ret;
1769}
1770
1771static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1772 void *data, size_t size)
1773{
1774 struct inode *inode = dentry->d_inode;
1775 struct gfs2_inode *ip = GFS2_I(inode);
1776 struct gfs2_holder gh;
1777 int ret;
1778
1779 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1780 ret = gfs2_glock_nq(&gh);
1781 if (ret == 0) {
1782 ret = generic_getxattr(dentry, name, data, size);
1783 gfs2_glock_dq(&gh);
1784 }
1785 gfs2_holder_uninit(&gh);
1786 return ret;
1787}
1788
1789static int gfs2_removexattr(struct dentry *dentry, const char *name)
1790{
1791 struct inode *inode = dentry->d_inode;
1792 struct gfs2_inode *ip = GFS2_I(inode);
1793 struct gfs2_holder gh;
1794 int ret;
1795
1796 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1797 ret = gfs2_glock_nq(&gh);
1798 if (ret == 0) {
1799 ret = generic_removexattr(dentry, name);
1800 gfs2_glock_dq(&gh);
1801 }
1802 gfs2_holder_uninit(&gh);
1803 return ret;
1804}
1805
1806static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1807 u64 start, u64 len)
1808{
1809 struct gfs2_inode *ip = GFS2_I(inode);
1810 struct gfs2_holder gh;
1811 int ret;
1812
1813 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1814 if (ret)
1815 return ret;
1816
1817 mutex_lock(&inode->i_mutex);
1818
1819 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1820 if (ret)
1821 goto out;
1822
1823 if (gfs2_is_stuffed(ip)) {
1824 u64 phys = ip->i_no_addr << inode->i_blkbits;
1825 u64 size = i_size_read(inode);
1826 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1827 FIEMAP_EXTENT_DATA_INLINE;
1828 phys += sizeof(struct gfs2_dinode);
1829 phys += start;
1830 if (start + len > size)
1831 len = size - start;
1832 if (start < size)
1833 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1834 len, flags);
1835 if (ret == 1)
1836 ret = 0;
1837 } else {
1838 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1839 gfs2_block_map);
1840 }
1841
1842 gfs2_glock_dq_uninit(&gh);
1843out:
1844 mutex_unlock(&inode->i_mutex);
1845 return ret;
1007} 1846}
1008 1847
1848const struct inode_operations gfs2_file_iops = {
1849 .permission = gfs2_permission,
1850 .setattr = gfs2_setattr,
1851 .getattr = gfs2_getattr,
1852 .setxattr = gfs2_setxattr,
1853 .getxattr = gfs2_getxattr,
1854 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap,
1857};
1858
1859const struct inode_operations gfs2_dir_iops = {
1860 .create = gfs2_create,
1861 .lookup = gfs2_lookup,
1862 .link = gfs2_link,
1863 .unlink = gfs2_unlink,
1864 .symlink = gfs2_symlink,
1865 .mkdir = gfs2_mkdir,
1866 .rmdir = gfs2_unlink,
1867 .mknod = gfs2_mknod,
1868 .rename = gfs2_rename,
1869 .permission = gfs2_permission,
1870 .setattr = gfs2_setattr,
1871 .getattr = gfs2_getattr,
1872 .setxattr = gfs2_setxattr,
1873 .getxattr = gfs2_getxattr,
1874 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap,
1877};
1878
1879const struct inode_operations gfs2_symlink_iops = {
1880 .readlink = generic_readlink,
1881 .follow_link = gfs2_follow_link,
1882 .put_link = gfs2_put_link,
1883 .permission = gfs2_permission,
1884 .setattr = gfs2_setattr,
1885 .getattr = gfs2_getattr,
1886 .setxattr = gfs2_setxattr,
1887 .getxattr = gfs2_getxattr,
1888 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap,
1891};
1892
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
103 u64 *no_formal_ino, 103 u64 *no_formal_ino,
104 unsigned int blktype); 104 unsigned int blktype);
105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
106 106
107extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
110extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
111extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
112 int is_root); 110 int is_root);
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name,
115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
120extern void gfs2_dinode_print(const struct gfs2_inode *ip);
121 115
122extern const struct inode_operations gfs2_file_iops; 116extern const struct inode_operations gfs2_file_iops;
123extern const struct inode_operations gfs2_dir_iops; 117extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
83/** 84/**
84 * gfs2_ail1_start_one - Start I/O on a part of the AIL 85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
85 * @sdp: the filesystem 86 * @sdp: the filesystem
86 * @tr: the part of the AIL 87 * @wbc: The writeback control structure
88 * @ai: The ail structure
87 * 89 *
88 */ 90 */
89 91
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 92static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
93 struct writeback_control *wbc,
94 struct gfs2_ail *ai)
91__releases(&sdp->sd_ail_lock) 95__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_ail_lock) 96__acquires(&sdp->sd_ail_lock)
93{ 97{
98 struct gfs2_glock *gl = NULL;
99 struct address_space *mapping;
94 struct gfs2_bufdata *bd, *s; 100 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 101 struct buffer_head *bh;
96 int retry;
97 102
98 do { 103 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
99 retry = 0; 104 bh = bd->bd_bh;
100 105
101 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 106 gfs2_assert(sdp, bd->bd_ail == ai);
102 bd_ail_st_list) {
103 bh = bd->bd_bh;
104 107
105 gfs2_assert(sdp, bd->bd_ail == ai); 108 if (!buffer_busy(bh)) {
109 if (!buffer_uptodate(bh))
110 gfs2_io_error_bh(sdp, bh);
111 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
112 continue;
113 }
106 114
107 if (!buffer_busy(bh)) { 115 if (!buffer_dirty(bh))
108 if (!buffer_uptodate(bh)) 116 continue;
109 gfs2_io_error_bh(sdp, bh); 117 if (gl == bd->bd_gl)
110 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 continue;
111 continue; 119 gl = bd->bd_gl;
112 } 120 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
121 mapping = bh->b_page->mapping;
122 if (!mapping)
123 continue;
124 spin_unlock(&sdp->sd_ail_lock);
125 generic_writepages(mapping, wbc);
126 spin_lock(&sdp->sd_ail_lock);
127 if (wbc->nr_to_write <= 0)
128 break;
129 return 1;
130 }
113 131
114 if (!buffer_dirty(bh)) 132 return 0;
115 continue; 133}
116 134
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 135
119 get_bh(bh); 136/**
120 spin_unlock(&sdp->sd_ail_lock); 137 * gfs2_ail1_flush - start writeback of some ail1 entries
121 lock_buffer(bh); 138 * @sdp: The super block
122 if (test_clear_buffer_dirty(bh)) { 139 * @wbc: The writeback control structure
123 bh->b_end_io = end_buffer_write_sync; 140 *
124 submit_bh(WRITE_SYNC, bh); 141 * Writes back some ail1 entries, according to the limits in the
125 } else { 142 * writeback control structure
126 unlock_buffer(bh); 143 */
127 brelse(bh); 144
128 } 145void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
129 spin_lock(&sdp->sd_ail_lock); 146{
130 147 struct list_head *head = &sdp->sd_ail1_list;
131 retry = 1; 148 struct gfs2_ail *ai;
149
150 trace_gfs2_ail_flush(sdp, wbc, 1);
151 spin_lock(&sdp->sd_ail_lock);
152restart:
153 list_for_each_entry_reverse(ai, head, ai_list) {
154 if (wbc->nr_to_write <= 0)
132 break; 155 break;
133 } 156 if (gfs2_ail1_start_one(sdp, wbc, ai))
134 } while (retry); 157 goto restart;
158 }
159 spin_unlock(&sdp->sd_ail_lock);
160 trace_gfs2_ail_flush(sdp, wbc, 0);
161}
162
163/**
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
166 */
167
168static void gfs2_ail1_start(struct gfs2_sbd *sdp)
169{
170 struct writeback_control wbc = {
171 .sync_mode = WB_SYNC_NONE,
172 .nr_to_write = LONG_MAX,
173 .range_start = 0,
174 .range_end = LLONG_MAX,
175 };
176
177 return gfs2_ail1_flush(sdp, &wbc);
135} 178}
136 179
137/** 180/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
141 * 184 *
142 */ 185 */
143 186
144static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) 187static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
145{ 188{
146 struct gfs2_bufdata *bd, *s; 189 struct gfs2_bufdata *bd, *s;
147 struct buffer_head *bh; 190 struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
149 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 192 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
150 bd_ail_st_list) { 193 bd_ail_st_list) {
151 bh = bd->bd_bh; 194 bh = bd->bd_bh;
152
153 gfs2_assert(sdp, bd->bd_ail == ai); 195 gfs2_assert(sdp, bd->bd_ail == ai);
154 196 if (buffer_busy(bh))
155 if (buffer_busy(bh)) { 197 continue;
156 if (flags & DIO_ALL)
157 continue;
158 else
159 break;
160 }
161
162 if (!buffer_uptodate(bh)) 198 if (!buffer_uptodate(bh))
163 gfs2_io_error_bh(sdp, bh); 199 gfs2_io_error_bh(sdp, bh);
164
165 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 200 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
166 } 201 }
167 202
168 return list_empty(&ai->ai_ail1_list);
169} 203}
170 204
171static void gfs2_ail1_start(struct gfs2_sbd *sdp) 205/**
172{ 206 * gfs2_ail1_empty - Try to empty the ail1 lists
173 struct list_head *head; 207 * @sdp: The superblock
174 u64 sync_gen; 208 *
175 struct gfs2_ail *ai; 209 * Tries to empty the ail1 lists, starting with the oldest first
176 int done = 0; 210 */
177
178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) {
181 spin_unlock(&sdp->sd_ail_lock);
182 return;
183 }
184 sync_gen = sdp->sd_ail_sync_gen++;
185
186 while(!done) {
187 done = 1;
188 list_for_each_entry_reverse(ai, head, ai_list) {
189 if (ai->ai_sync_gen >= sync_gen)
190 continue;
191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0;
194 break;
195 }
196 }
197
198 spin_unlock(&sdp->sd_ail_lock);
199}
200 211
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 212static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
202{ 213{
203 struct gfs2_ail *ai, *s; 214 struct gfs2_ail *ai, *s;
204 int ret; 215 int ret;
205 216
206 spin_lock(&sdp->sd_ail_lock); 217 spin_lock(&sdp->sd_ail_lock);
207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 218 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 219 gfs2_ail1_empty_one(sdp, ai);
220 if (list_empty(&ai->ai_ail1_list))
210 list_move(&ai->ai_list, &sdp->sd_ail2_list); 221 list_move(&ai->ai_list, &sdp->sd_ail2_list);
211 else if (!(flags & DIO_ALL)) 222 else
212 break; 223 break;
213 } 224 }
214
215 ret = list_empty(&sdp->sd_ail1_list); 225 ret = list_empty(&sdp->sd_ail1_list);
216
217 spin_unlock(&sdp->sd_ail_lock); 226 spin_unlock(&sdp->sd_ail_lock);
218 227
219 return ret; 228 return ret;
220} 229}
221 230
231static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232{
233 struct gfs2_ail *ai;
234 struct gfs2_bufdata *bd;
235 struct buffer_head *bh;
236
237 spin_lock(&sdp->sd_ail_lock);
238 list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
239 list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
240 bh = bd->bd_bh;
241 if (!buffer_locked(bh))
242 continue;
243 get_bh(bh);
244 spin_unlock(&sdp->sd_ail_lock);
245 wait_on_buffer(bh);
246 brelse(bh);
247 return;
248 }
249 }
250 spin_unlock(&sdp->sd_ail_lock);
251}
222 252
223/** 253/**
224 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced 254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
574 set_buffer_uptodate(bh); 604 set_buffer_uptodate(bh);
575 clear_buffer_dirty(bh); 605 clear_buffer_dirty(bh);
576 606
577 gfs2_ail1_empty(sdp, 0); 607 gfs2_ail1_empty(sdp);
578 tail = current_tail(sdp); 608 tail = current_tail(sdp);
579 609
580 lh = (struct gfs2_log_header *)bh->b_data; 610 lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
869 gfs2_log_flush(sdp, NULL); 899 gfs2_log_flush(sdp, NULL);
870 for (;;) { 900 for (;;) {
871 gfs2_ail1_start(sdp); 901 gfs2_ail1_start(sdp);
872 if (gfs2_ail1_empty(sdp, DIO_ALL)) 902 gfs2_ail1_wait(sdp);
903 if (gfs2_ail1_empty(sdp))
873 break; 904 break;
874 msleep(10);
875 } 905 }
876} 906}
877 907
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
905 935
906 preflush = atomic_read(&sdp->sd_log_pinned); 936 preflush = atomic_read(&sdp->sd_log_pinned);
907 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 937 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
908 gfs2_ail1_empty(sdp, DIO_ALL); 938 gfs2_ail1_empty(sdp);
909 gfs2_log_flush(sdp, NULL); 939 gfs2_log_flush(sdp, NULL);
910 gfs2_ail1_empty(sdp, DIO_ALL);
911 } 940 }
912 941
913 if (gfs2_ail_flush_reqd(sdp)) { 942 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_ail1_start(sdp); 943 gfs2_ail1_start(sdp);
915 io_schedule(); 944 gfs2_ail1_wait(sdp);
916 gfs2_ail1_empty(sdp, 0); 945 gfs2_ail1_empty(sdp);
917 gfs2_log_flush(sdp, NULL); 946 gfs2_log_flush(sdp, NULL);
918 gfs2_ail1_empty(sdp, DIO_ALL);
919 } 947 }
920 948
921 wake_up(&sdp->sd_log_waitq); 949 if (!gfs2_ail_flush_reqd(sdp))
950 wake_up(&sdp->sd_log_waitq);
951
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 952 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 953 if (freezing(current))
924 refrigerator(); 954 refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/writeback.h>
15#include "incore.h" 16#include "incore.h"
16 17
17/** 18/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
61extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
63extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
62 64
63extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); 65extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 66extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40{ 40{
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 42
43 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 43 BUG_ON(!current->journal_info);
44 44
45 clear_buffer_dirty(bh); 45 clear_buffer_dirty(bh);
46 if (test_set_buffer_pinned(bh)) 46 if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
65 * @sdp: the filesystem the buffer belongs to 65 * @sdp: the filesystem the buffer belongs to
66 * @bh: The buffer to unpin 66 * @bh: The buffer to unpin
67 * @ai: 67 * @ai:
68 * @flags: The inode dirty flags
68 * 69 *
69 */ 70 */
70 71
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
73{ 74{
74 struct gfs2_bufdata *bd = bh->b_private; 75 struct gfs2_bufdata *bd = bh->b_private;
75 76
76 gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 77 BUG_ON(!buffer_uptodate(bh));
77 78 BUG_ON(!buffer_pinned(bh));
78 if (!buffer_pinned(bh))
79 gfs2_assert_withdraw(sdp, 0);
80 79
81 lock_buffer(bh); 80 lock_buffer(bh);
82 mark_buffer_dirty(bh); 81 mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 94 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
96 spin_unlock(&sdp->sd_ail_lock); 95 spin_unlock(&sdp->sd_ail_lock);
97 96
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) 97 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
100 trace_gfs2_pin(bd, 0); 98 trace_gfs2_pin(bd, 0);
101 unlock_buffer(bh); 99 unlock_buffer(bh);
102 atomic_dec(&sdp->sd_log_pinned); 100 atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 320
323static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 321static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
324{ 322{
323 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
324 struct gfs2_glock *gl = bd->bd_gl;
325 struct gfs2_trans *tr; 325 struct gfs2_trans *tr;
326 326
327 tr = current->journal_info; 327 tr = current->journal_info;
328 tr->tr_touched = 1; 328 tr->tr_touched = 1;
329 tr->tr_num_revoke++; 329 tr->tr_num_revoke++;
330 sdp->sd_log_num_revoke++; 330 sdp->sd_log_num_revoke++;
331 atomic_inc(&gl->gl_revokes);
332 set_bit(GLF_LFLUSH, &gl->gl_flags);
331 list_add(&le->le_list, &sdp->sd_log_le_revoke); 333 list_add(&le->le_list, &sdp->sd_log_le_revoke);
332} 334}
333 335
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
350 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 352 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
351 offset = sizeof(struct gfs2_log_descriptor); 353 offset = sizeof(struct gfs2_log_descriptor);
352 354
353 while (!list_empty(head)) { 355 list_for_each_entry(bd, head, bd_le.le_list) {
354 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
355 list_del_init(&bd->bd_le.le_list);
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
367 } 367 }
368 368
369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
370 kmem_cache_free(gfs2_bufdata_cachep, bd);
371
372 offset += sizeof(u64); 370 offset += sizeof(u64);
373 } 371 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 372 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
376 submit_bh(WRITE_SYNC, bh); 374 submit_bh(WRITE_SYNC, bh);
377} 375}
378 376
377static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
378{
379 struct list_head *head = &sdp->sd_log_le_revoke;
380 struct gfs2_bufdata *bd;
381 struct gfs2_glock *gl;
382
383 while (!list_empty(head)) {
384 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
385 list_del_init(&bd->bd_le.le_list);
386 gl = bd->bd_gl;
387 atomic_dec(&gl->gl_revokes);
388 clear_bit(GLF_LFLUSH, &gl->gl_flags);
389 kmem_cache_free(gfs2_bufdata_cachep, bd);
390 }
391}
392
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 393static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
380 struct gfs2_log_header_host *head, int pass) 394 struct gfs2_log_header_host *head, int pass)
381{ 395{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
749const struct gfs2_log_operations gfs2_revoke_lops = { 763const struct gfs2_log_operations gfs2_revoke_lops = {
750 .lo_add = revoke_lo_add, 764 .lo_add = revoke_lo_add,
751 .lo_before_commit = revoke_lo_before_commit, 765 .lo_before_commit = revoke_lo_before_commit,
766 .lo_after_commit = revoke_lo_after_commit,
752 .lo_before_scan = revoke_lo_before_scan, 767 .lo_before_scan = revoke_lo_before_scan,
753 .lo_scan_elements = revoke_lo_scan_elements, 768 .lo_scan_elements = revoke_lo_scan_elements,
754 .lo_after_scan = revoke_lo_after_scan, 769 .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..c2b34cd2abe0 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
53 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
54 INIT_LIST_HEAD(&gl->gl_ail_list); 54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0); 55 atomic_set(&gl->gl_ail_count, 0);
56 atomic_set(&gl->gl_revokes, 0);
56} 57}
57 58
58static void gfs2_init_gl_aspace_once(void *foo) 59static void gfs2_init_gl_aspace_once(void *foo)
@@ -145,7 +146,7 @@ static int __init init_gfs2_fs(void)
145 146
146 gfs2_register_debugfs(); 147 gfs2_register_debugfs();
147 148
148 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); 149 printk("GFS2 installed\n");
149 150
150 return 0; 151 return 0;
151 152
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
31#include "rgrp.h" 31#include "rgrp.h"
32#include "trans.h" 32#include "trans.h"
33#include "util.h" 33#include "util.h"
34#include "trace_gfs2.h"
34 35
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 37{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
310 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
311 312
312 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0);
313 atomic_dec(&sdp->sd_log_pinned); 315 atomic_dec(&sdp->sd_log_pinned);
314 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
315 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
77 77
78#define buffer_busy(bh) \ 78#define buffer_busy(bh) \
79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) 79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
80#define buffer_in_io(bh) \
81((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
82 80
83#endif /* __DIO_DOT_H__ */ 81#endif /* __DIO_DOT_H__ */
84 82
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
126 * changed. 126 * changed.
127 */ 127 */
128 128
129static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) 129static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
130{ 130{
131 struct gfs2_sb_host *sb = &sdp->sd_sb;
132
131 if (sb->sb_magic != GFS2_MAGIC || 133 if (sb->sb_magic != GFS2_MAGIC ||
132 sb->sb_type != GFS2_METATYPE_SB) { 134 sb->sb_type != GFS2_METATYPE_SB) {
133 if (!silent) 135 if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
157 unlock_page(page); 159 unlock_page(page);
158} 160}
159 161
160static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 162static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
161{ 163{
164 struct gfs2_sb_host *sb = &sdp->sd_sb;
165 struct super_block *s = sdp->sd_vfs;
162 const struct gfs2_sb *str = buf; 166 const struct gfs2_sb *str = buf;
163 167
164 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); 168 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
175 179
176 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 180 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
177 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 181 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
178 memcpy(sb->sb_uuid, str->sb_uuid, 16); 182 memcpy(s->s_uuid, str->sb_uuid, 16);
179} 183}
180 184
181/** 185/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
197 * Returns: 0 on success or error 201 * Returns: 0 on success or error
198 */ 202 */
199 203
200static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) 204static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
201{ 205{
202 struct super_block *sb = sdp->sd_vfs; 206 struct super_block *sb = sdp->sd_vfs;
203 struct gfs2_sb *p; 207 struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
227 return -EIO; 231 return -EIO;
228 } 232 }
229 p = kmap(page); 233 p = kmap(page);
230 gfs2_sb_in(&sdp->sd_sb, p); 234 gfs2_sb_in(sdp, p);
231 kunmap(page); 235 kunmap(page);
232 __free_page(page); 236 __free_page(page);
233 return 0; 237 return gfs2_check_sb(sdp, silent);
234} 238}
235 239
236/** 240/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
247 unsigned int x; 251 unsigned int x;
248 int error; 252 int error;
249 253
250 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 254 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
251 if (error) { 255 if (error) {
252 if (!silent) 256 if (!silent)
253 fs_err(sdp, "can't read superblock\n"); 257 fs_err(sdp, "can't read superblock\n");
254 return error; 258 return error;
255 } 259 }
256 260
257 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
258 if (error)
259 return error;
260
261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
262 GFS2_BASIC_BLOCK_SHIFT; 262 GFS2_BASIC_BLOCK_SHIFT;
263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
340 /* Try to autodetect */ 340 /* Try to autodetect */
341 341
342 if (!proto[0] || !table[0]) { 342 if (!proto[0] || !table[0]) {
343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
344 if (error) 344 if (error)
345 return error; 345 return error;
346 346
347 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
348 if (error)
349 goto out;
350
351 if (!proto[0]) 347 if (!proto[0])
352 proto = sdp->sd_sb.sb_lockproto; 348 proto = sdp->sd_sb.sb_lockproto;
353 if (!table[0]) 349 if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
364 while ((table = strchr(table, '/'))) 360 while ((table = strchr(table, '/')))
365 *table = '_'; 361 *table = '_';
366 362
367out:
368 return error; 363 return error;
369} 364}
370 365
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1119 if (sdp->sd_args.ar_statfs_quantum) { 1114 if (sdp->sd_args.ar_statfs_quantum) {
1120 sdp->sd_tune.gt_statfs_slow = 0; 1115 sdp->sd_tune.gt_statfs_slow = 0;
1121 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; 1116 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1122 } 1117 } else {
1123 else {
1124 sdp->sd_tune.gt_statfs_slow = 1; 1118 sdp->sd_tune.gt_statfs_slow = 1;
1125 sdp->sd_tune.gt_statfs_quantum = 30; 1119 sdp->sd_tune.gt_statfs_quantum = 30;
1126 } 1120 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h>
20#include <linux/fiemap.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "acl.h"
26#include "bmap.h"
27#include "dir.h"
28#include "xattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "util.h"
36#include "super.h"
37
38/**
39 * gfs2_create - Create a file
40 * @dir: The directory in which to create the file
41 * @dentry: The dentry of the new file
42 * @mode: The mode of the new file
43 *
44 * Returns: errno
45 */
46
47static int gfs2_create(struct inode *dir, struct dentry *dentry,
48 int mode, struct nameidata *nd)
49{
50 struct gfs2_inode *dip = GFS2_I(dir);
51 struct gfs2_sbd *sdp = GFS2_SB(dir);
52 struct gfs2_holder ghs[2];
53 struct inode *inode;
54
55 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
56
57 for (;;) {
58 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
59 if (!IS_ERR(inode)) {
60 gfs2_trans_end(sdp);
61 if (dip->i_alloc->al_rgd)
62 gfs2_inplace_release(dip);
63 gfs2_quota_unlock(dip);
64 gfs2_alloc_put(dip);
65 gfs2_glock_dq_uninit_m(2, ghs);
66 mark_inode_dirty(inode);
67 break;
68 } else if (PTR_ERR(inode) != -EEXIST ||
69 (nd && nd->flags & LOOKUP_EXCL)) {
70 gfs2_holder_uninit(ghs);
71 return PTR_ERR(inode);
72 }
73
74 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
75 if (inode) {
76 if (!IS_ERR(inode)) {
77 gfs2_holder_uninit(ghs);
78 break;
79 } else {
80 gfs2_holder_uninit(ghs);
81 return PTR_ERR(inode);
82 }
83 }
84 }
85
86 d_instantiate(dentry, inode);
87
88 return 0;
89}
90
91/**
92 * gfs2_lookup - Look up a filename in a directory and return its inode
93 * @dir: The directory inode
94 * @dentry: The dentry of the new inode
95 * @nd: passed from Linux VFS, ignored by us
96 *
97 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
98 *
99 * Returns: errno
100 */
101
102static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
103 struct nameidata *nd)
104{
105 struct inode *inode = NULL;
106
107 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
108 if (inode && IS_ERR(inode))
109 return ERR_CAST(inode);
110
111 if (inode) {
112 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
113 struct gfs2_holder gh;
114 int error;
115 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
116 if (error) {
117 iput(inode);
118 return ERR_PTR(error);
119 }
120 gfs2_glock_dq_uninit(&gh);
121 return d_splice_alias(inode, dentry);
122 }
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = GFS2_I(dir);
144 struct gfs2_sbd *sdp = GFS2_SB(dir);
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = GFS2_I(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(inode->i_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq(ghs); /* parent */
158 if (error)
159 goto out_parent;
160
161 error = gfs2_glock_nq(ghs + 1); /* child */
162 if (error)
163 goto out_child;
164
165 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
166 if (error)
167 goto out_gunlock;
168
169 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
170 switch (error) {
171 case -ENOENT:
172 break;
173 case 0:
174 error = -EEXIST;
175 default:
176 goto out_gunlock;
177 }
178
179 error = -EINVAL;
180 if (!dip->i_inode.i_nlink)
181 goto out_gunlock;
182 error = -EFBIG;
183 if (dip->i_entries == (u32)-1)
184 goto out_gunlock;
185 error = -EPERM;
186 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
187 goto out_gunlock;
188 error = -EINVAL;
189 if (!ip->i_inode.i_nlink)
190 goto out_gunlock;
191 error = -EMLINK;
192 if (ip->i_inode.i_nlink == (u32)-1)
193 goto out_gunlock;
194
195 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
196 if (error < 0)
197 goto out_gunlock;
198 error = 0;
199
200 if (alloc_required) {
201 struct gfs2_alloc *al = gfs2_alloc_get(dip);
202 if (!al) {
203 error = -ENOMEM;
204 goto out_gunlock;
205 }
206
207 error = gfs2_quota_lock_check(dip);
208 if (error)
209 goto out_alloc;
210
211 al->al_requested = sdp->sd_max_dirres;
212
213 error = gfs2_inplace_reserve(dip);
214 if (error)
215 goto out_gunlock_q;
216
217 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
218 gfs2_rg_blocks(al) +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
230 if (error)
231 goto out_end_trans;
232
233 error = gfs2_change_nlink(ip, +1);
234
235out_end_trans:
236 gfs2_trans_end(sdp);
237out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240out_gunlock_q:
241 if (alloc_required)
242 gfs2_quota_unlock(dip);
243out_alloc:
244 if (alloc_required)
245 gfs2_alloc_put(dip);
246out_gunlock:
247 gfs2_glock_dq(ghs + 1);
248out_child:
249 gfs2_glock_dq(ghs);
250out_parent:
251 gfs2_holder_uninit(ghs);
252 gfs2_holder_uninit(ghs + 1);
253 if (!error) {
254 ihold(inode);
255 d_instantiate(dentry, inode);
256 mark_inode_dirty(inode);
257 }
258 return error;
259}
260
261/*
262 * gfs2_unlink_ok - check to see that a inode is still in a directory
263 * @dip: the directory
264 * @name: the name of the file
265 * @ip: the inode
266 *
267 * Assumes that the lock on (at least) @dip is held.
268 *
269 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
270 */
271
272static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
273 const struct gfs2_inode *ip)
274{
275 int error;
276
277 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
278 return -EPERM;
279
280 if ((dip->i_inode.i_mode & S_ISVTX) &&
281 dip->i_inode.i_uid != current_fsuid() &&
282 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
283 return -EPERM;
284
285 if (IS_APPEND(&dip->i_inode))
286 return -EPERM;
287
288 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
289 if (error)
290 return error;
291
292 error = gfs2_dir_check(&dip->i_inode, name, ip);
293 if (error)
294 return error;
295
296 return 0;
297}
298
299/**
300 * gfs2_unlink - Unlink a file
301 * @dir: The inode of the directory containing the file to unlink
302 * @dentry: The file itself
303 *
304 * Unlink a file. Call gfs2_unlinki()
305 *
306 * Returns: errno
307 */
308
309static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
310{
311 struct gfs2_inode *dip = GFS2_I(dir);
312 struct gfs2_sbd *sdp = GFS2_SB(dir);
313 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
314 struct gfs2_holder ghs[3];
315 struct gfs2_rgrpd *rgd;
316 struct gfs2_holder ri_gh;
317 int error;
318
319 error = gfs2_rindex_hold(sdp, &ri_gh);
320 if (error)
321 return error;
322
323 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
324 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
325
326 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
327 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
328
329
330 error = gfs2_glock_nq(ghs); /* parent */
331 if (error)
332 goto out_parent;
333
334 error = gfs2_glock_nq(ghs + 1); /* child */
335 if (error)
336 goto out_child;
337
338 error = gfs2_glock_nq(ghs + 2); /* rgrp */
339 if (error)
340 goto out_rgrp;
341
342 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
343 if (error)
344 goto out_gunlock;
345
346 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
347 if (error)
348 goto out_gunlock;
349
350 error = gfs2_dir_del(dip, &dentry->d_name);
351 if (error)
352 goto out_end_trans;
353
354 error = gfs2_change_nlink(ip, -1);
355
356out_end_trans:
357 gfs2_trans_end(sdp);
358out_gunlock:
359 gfs2_glock_dq(ghs + 2);
360out_rgrp:
361 gfs2_holder_uninit(ghs + 2);
362 gfs2_glock_dq(ghs + 1);
363out_child:
364 gfs2_holder_uninit(ghs + 1);
365 gfs2_glock_dq(ghs);
366out_parent:
367 gfs2_holder_uninit(ghs);
368 gfs2_glock_dq_uninit(&ri_gh);
369 return error;
370}
371
372/**
373 * gfs2_symlink - Create a symlink
374 * @dir: The directory to create the symlink in
375 * @dentry: The dentry to put the symlink in
376 * @symname: The thing which the link points to
377 *
378 * Returns: errno
379 */
380
381static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
382 const char *symname)
383{
384 struct gfs2_inode *dip = GFS2_I(dir), *ip;
385 struct gfs2_sbd *sdp = GFS2_SB(dir);
386 struct gfs2_holder ghs[2];
387 struct inode *inode;
388 struct buffer_head *dibh;
389 int size;
390 int error;
391
392 /* Must be stuffed with a null terminator for gfs2_follow_link() */
393 size = strlen(symname);
394 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
395 return -ENAMETOOLONG;
396
397 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
398
399 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
400 if (IS_ERR(inode)) {
401 gfs2_holder_uninit(ghs);
402 return PTR_ERR(inode);
403 }
404
405 ip = ghs[1].gh_gl->gl_object;
406
407 i_size_write(inode, size);
408
409 error = gfs2_meta_inode_buffer(ip, &dibh);
410
411 if (!gfs2_assert_withdraw(sdp, !error)) {
412 gfs2_dinode_out(ip, dibh->b_data);
413 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
414 size);
415 brelse(dibh);
416 }
417
418 gfs2_trans_end(sdp);
419 if (dip->i_alloc->al_rgd)
420 gfs2_inplace_release(dip);
421 gfs2_quota_unlock(dip);
422 gfs2_alloc_put(dip);
423
424 gfs2_glock_dq_uninit_m(2, ghs);
425
426 d_instantiate(dentry, inode);
427 mark_inode_dirty(inode);
428
429 return 0;
430}
431
432/**
433 * gfs2_mkdir - Make a directory
434 * @dir: The parent directory of the new one
435 * @dentry: The dentry of the new directory
436 * @mode: The mode of the new directory
437 *
438 * Returns: errno
439 */
440
441static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct gfs2_inode *dip = GFS2_I(dir), *ip;
444 struct gfs2_sbd *sdp = GFS2_SB(dir);
445 struct gfs2_holder ghs[2];
446 struct inode *inode;
447 struct buffer_head *dibh;
448 int error;
449
450 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
451
452 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
453 if (IS_ERR(inode)) {
454 gfs2_holder_uninit(ghs);
455 return PTR_ERR(inode);
456 }
457
458 ip = ghs[1].gh_gl->gl_object;
459
460 ip->i_inode.i_nlink = 2;
461 i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
462 ip->i_diskflags |= GFS2_DIF_JDATA;
463 ip->i_entries = 2;
464
465 error = gfs2_meta_inode_buffer(ip, &dibh);
466
467 if (!gfs2_assert_withdraw(sdp, !error)) {
468 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
469 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
473 dent->de_inum = di->di_num; /* already GFS2 endian */
474 dent->de_type = cpu_to_be16(DT_DIR);
475 di->di_entries = cpu_to_be32(1);
476
477 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
478 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
479
480 gfs2_inum_out(dip, dent);
481 dent->de_type = cpu_to_be16(DT_DIR);
482
483 gfs2_dinode_out(ip, di);
484
485 brelse(dibh);
486 }
487
488 error = gfs2_change_nlink(dip, +1);
489 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
490
491 gfs2_trans_end(sdp);
492 if (dip->i_alloc->al_rgd)
493 gfs2_inplace_release(dip);
494 gfs2_quota_unlock(dip);
495 gfs2_alloc_put(dip);
496
497 gfs2_glock_dq_uninit_m(2, ghs);
498
499 d_instantiate(dentry, inode);
500 mark_inode_dirty(inode);
501
502 return 0;
503}
504
505/**
506 * gfs2_rmdiri - Remove a directory
507 * @dip: The parent directory of the directory to be removed
508 * @name: The name of the directory to be removed
509 * @ip: The GFS2 inode of the directory to be removed
510 *
511 * Assumes Glocks on dip and ip are held
512 *
513 * Returns: errno
514 */
515
516static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
517 struct gfs2_inode *ip)
518{
519 int error;
520
521 if (ip->i_entries != 2) {
522 if (gfs2_consist_inode(ip))
523 gfs2_dinode_print(ip);
524 return -EIO;
525 }
526
527 error = gfs2_dir_del(dip, name);
528 if (error)
529 return error;
530
531 error = gfs2_change_nlink(dip, -1);
532 if (error)
533 return error;
534
535 error = gfs2_dir_del(ip, &gfs2_qdot);
536 if (error)
537 return error;
538
539 error = gfs2_dir_del(ip, &gfs2_qdotdot);
540 if (error)
541 return error;
542
543 /* It looks odd, but it really should be done twice */
544 error = gfs2_change_nlink(ip, -1);
545 if (error)
546 return error;
547
548 error = gfs2_change_nlink(ip, -1);
549 if (error)
550 return error;
551
552 return error;
553}
554
555/**
556 * gfs2_rmdir - Remove a directory
557 * @dir: The parent directory of the directory to be removed
558 * @dentry: The dentry of the directory to remove
559 *
560 * Remove a directory. Call gfs2_rmdiri()
561 *
562 * Returns: errno
563 */
564
565static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
566{
567 struct gfs2_inode *dip = GFS2_I(dir);
568 struct gfs2_sbd *sdp = GFS2_SB(dir);
569 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
570 struct gfs2_holder ghs[3];
571 struct gfs2_rgrpd *rgd;
572 struct gfs2_holder ri_gh;
573 int error;
574
575 error = gfs2_rindex_hold(sdp, &ri_gh);
576 if (error)
577 return error;
578 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
579 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
580
581 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
582 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
583
584 error = gfs2_glock_nq(ghs); /* parent */
585 if (error)
586 goto out_parent;
587
588 error = gfs2_glock_nq(ghs + 1); /* child */
589 if (error)
590 goto out_child;
591
592 error = gfs2_glock_nq(ghs + 2); /* rgrp */
593 if (error)
594 goto out_rgrp;
595
596 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
597 if (error)
598 goto out_gunlock;
599
600 if (ip->i_entries < 2) {
601 if (gfs2_consist_inode(ip))
602 gfs2_dinode_print(ip);
603 error = -EIO;
604 goto out_gunlock;
605 }
606 if (ip->i_entries > 2) {
607 error = -ENOTEMPTY;
608 goto out_gunlock;
609 }
610
611 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
612 if (error)
613 goto out_gunlock;
614
615 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
616
617 gfs2_trans_end(sdp);
618
619out_gunlock:
620 gfs2_glock_dq(ghs + 2);
621out_rgrp:
622 gfs2_holder_uninit(ghs + 2);
623 gfs2_glock_dq(ghs + 1);
624out_child:
625 gfs2_holder_uninit(ghs + 1);
626 gfs2_glock_dq(ghs);
627out_parent:
628 gfs2_holder_uninit(ghs);
629 gfs2_glock_dq_uninit(&ri_gh);
630 return error;
631}
632
633/**
634 * gfs2_mknod - Make a special file
635 * @dir: The directory in which the special file will reside
636 * @dentry: The dentry of the special file
637 * @mode: The mode of the special file
638 * @rdev: The device specification of the special file
639 *
640 */
641
642static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
643 dev_t dev)
644{
645 struct gfs2_inode *dip = GFS2_I(dir);
646 struct gfs2_sbd *sdp = GFS2_SB(dir);
647 struct gfs2_holder ghs[2];
648 struct inode *inode;
649
650 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
651
652 inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
653 if (IS_ERR(inode)) {
654 gfs2_holder_uninit(ghs);
655 return PTR_ERR(inode);
656 }
657
658 gfs2_trans_end(sdp);
659 if (dip->i_alloc->al_rgd)
660 gfs2_inplace_release(dip);
661 gfs2_quota_unlock(dip);
662 gfs2_alloc_put(dip);
663
664 gfs2_glock_dq_uninit_m(2, ghs);
665
666 d_instantiate(dentry, inode);
667 mark_inode_dirty(inode);
668
669 return 0;
670}
671
672/*
673 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
674 * @this: move this
675 * @to: to here
676 *
677 * Follow @to back to the root and make sure we don't encounter @this
678 * Assumes we already hold the rename lock.
679 *
680 * Returns: errno
681 */
682
683static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
684{
685 struct inode *dir = &to->i_inode;
686 struct super_block *sb = dir->i_sb;
687 struct inode *tmp;
688 int error = 0;
689
690 igrab(dir);
691
692 for (;;) {
693 if (dir == &this->i_inode) {
694 error = -EINVAL;
695 break;
696 }
697 if (dir == sb->s_root->d_inode) {
698 error = 0;
699 break;
700 }
701
702 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
703 if (IS_ERR(tmp)) {
704 error = PTR_ERR(tmp);
705 break;
706 }
707
708 iput(dir);
709 dir = tmp;
710 }
711
712 iput(dir);
713
714 return error;
715}
716
717/**
718 * gfs2_rename - Rename a file
719 * @odir: Parent directory of old file name
720 * @odentry: The old dentry of the file
721 * @ndir: Parent directory of new file name
722 * @ndentry: The new dentry of the file
723 *
724 * Returns: errno
725 */
726
727static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 struct inode *ndir, struct dentry *ndentry)
729{
730 struct gfs2_inode *odip = GFS2_I(odir);
731 struct gfs2_inode *ndip = GFS2_I(ndir);
732 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
733 struct gfs2_inode *nip = NULL;
734 struct gfs2_sbd *sdp = GFS2_SB(odir);
735 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
736 struct gfs2_rgrpd *nrgd;
737 unsigned int num_gh;
738 int dir_rename = 0;
739 int alloc_required = 0;
740 unsigned int x;
741 int error;
742
743 if (ndentry->d_inode) {
744 nip = GFS2_I(ndentry->d_inode);
745 if (ip == nip)
746 return 0;
747 }
748
749 error = gfs2_rindex_hold(sdp, &ri_gh);
750 if (error)
751 return error;
752
753 if (odip != ndip) {
754 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
755 0, &r_gh);
756 if (error)
757 goto out;
758
759 if (S_ISDIR(ip->i_inode.i_mode)) {
760 dir_rename = 1;
761 /* don't move a dirctory into it's subdir */
762 error = gfs2_ok_to_move(ip, ndip);
763 if (error)
764 goto out_gunlock_r;
765 }
766 }
767
768 num_gh = 1;
769 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
770 if (odip != ndip) {
771 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
772 num_gh++;
773 }
774 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
775 num_gh++;
776
777 if (nip) {
778 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
779 num_gh++;
780 /* grab the resource lock for unlink flag twiddling
781 * this is the case of the target file already existing
782 * so we unlink before doing the rename
783 */
784 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
785 if (nrgd)
786 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
787 }
788
789 for (x = 0; x < num_gh; x++) {
790 error = gfs2_glock_nq(ghs + x);
791 if (error)
792 goto out_gunlock;
793 }
794
795 /* Check out the old directory */
796
797 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
798 if (error)
799 goto out_gunlock;
800
801 /* Check out the new directory */
802
803 if (nip) {
804 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
805 if (error)
806 goto out_gunlock;
807
808 if (S_ISDIR(nip->i_inode.i_mode)) {
809 if (nip->i_entries < 2) {
810 if (gfs2_consist_inode(nip))
811 gfs2_dinode_print(nip);
812 error = -EIO;
813 goto out_gunlock;
814 }
815 if (nip->i_entries > 2) {
816 error = -ENOTEMPTY;
817 goto out_gunlock;
818 }
819 }
820 } else {
821 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
822 if (error)
823 goto out_gunlock;
824
825 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
826 switch (error) {
827 case -ENOENT:
828 error = 0;
829 break;
830 case 0:
831 error = -EEXIST;
832 default:
833 goto out_gunlock;
834 };
835
836 if (odip != ndip) {
837 if (!ndip->i_inode.i_nlink) {
838 error = -EINVAL;
839 goto out_gunlock;
840 }
841 if (ndip->i_entries == (u32)-1) {
842 error = -EFBIG;
843 goto out_gunlock;
844 }
845 if (S_ISDIR(ip->i_inode.i_mode) &&
846 ndip->i_inode.i_nlink == (u32)-1) {
847 error = -EMLINK;
848 goto out_gunlock;
849 }
850 }
851 }
852
853 /* Check out the dir to be renamed */
854
855 if (dir_rename) {
856 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
857 if (error)
858 goto out_gunlock;
859 }
860
861 if (nip == NULL)
862 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
863 error = alloc_required;
864 if (error < 0)
865 goto out_gunlock;
866 error = 0;
867
868 if (alloc_required) {
869 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
870 if (!al) {
871 error = -ENOMEM;
872 goto out_gunlock;
873 }
874
875 error = gfs2_quota_lock_check(ndip);
876 if (error)
877 goto out_alloc;
878
879 al->al_requested = sdp->sd_max_dirres;
880
881 error = gfs2_inplace_reserve_ri(ndip);
882 if (error)
883 goto out_gunlock_q;
884
885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
886 gfs2_rg_blocks(al) +
887 4 * RES_DINODE + 4 * RES_LEAF +
888 RES_STATFS + RES_QUOTA + 4, 0);
889 if (error)
890 goto out_ipreserv;
891 } else {
892 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
893 5 * RES_LEAF + 4, 0);
894 if (error)
895 goto out_gunlock;
896 }
897
898 /* Remove the target file, if it exists */
899
900 if (nip) {
901 if (S_ISDIR(nip->i_inode.i_mode))
902 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
903 else {
904 error = gfs2_dir_del(ndip, &ndentry->d_name);
905 if (error)
906 goto out_end_trans;
907 error = gfs2_change_nlink(nip, -1);
908 }
909 if (error)
910 goto out_end_trans;
911 }
912
913 if (dir_rename) {
914 error = gfs2_change_nlink(ndip, +1);
915 if (error)
916 goto out_end_trans;
917 error = gfs2_change_nlink(odip, -1);
918 if (error)
919 goto out_end_trans;
920
921 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
922 if (error)
923 goto out_end_trans;
924 } else {
925 struct buffer_head *dibh;
926 error = gfs2_meta_inode_buffer(ip, &dibh);
927 if (error)
928 goto out_end_trans;
929 ip->i_inode.i_ctime = CURRENT_TIME;
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh);
933 }
934
935 error = gfs2_dir_del(odip, &odentry->d_name);
936 if (error)
937 goto out_end_trans;
938
939 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
940 if (error)
941 goto out_end_trans;
942
943out_end_trans:
944 gfs2_trans_end(sdp);
945out_ipreserv:
946 if (alloc_required)
947 gfs2_inplace_release(ndip);
948out_gunlock_q:
949 if (alloc_required)
950 gfs2_quota_unlock(ndip);
951out_alloc:
952 if (alloc_required)
953 gfs2_alloc_put(ndip);
954out_gunlock:
955 while (x--) {
956 gfs2_glock_dq(ghs + x);
957 gfs2_holder_uninit(ghs + x);
958 }
959out_gunlock_r:
960 if (r_gh.gh_gl)
961 gfs2_glock_dq_uninit(&r_gh);
962out:
963 gfs2_glock_dq_uninit(&ri_gh);
964 return error;
965}
966
967/**
968 * gfs2_follow_link - Follow a symbolic link
969 * @dentry: The dentry of the link
970 * @nd: Data that we pass to vfs_follow_link()
971 *
972 * This can handle symlinks of any size.
973 *
974 * Returns: 0 on success or error code
975 */
976
977static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
978{
979 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
980 struct gfs2_holder i_gh;
981 struct buffer_head *dibh;
982 unsigned int x, size;
983 char *buf;
984 int error;
985
986 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
987 error = gfs2_glock_nq(&i_gh);
988 if (error) {
989 gfs2_holder_uninit(&i_gh);
990 nd_set_link(nd, ERR_PTR(error));
991 return NULL;
992 }
993
994 size = (unsigned int)i_size_read(&ip->i_inode);
995 if (size == 0) {
996 gfs2_consist_inode(ip);
997 buf = ERR_PTR(-EIO);
998 goto out;
999 }
1000
1001 error = gfs2_meta_inode_buffer(ip, &dibh);
1002 if (error) {
1003 buf = ERR_PTR(error);
1004 goto out;
1005 }
1006
1007 x = size + 1;
1008 buf = kmalloc(x, GFP_NOFS);
1009 if (!buf)
1010 buf = ERR_PTR(-ENOMEM);
1011 else
1012 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1013 brelse(dibh);
1014out:
1015 gfs2_glock_dq_uninit(&i_gh);
1016 nd_set_link(nd, buf);
1017 return NULL;
1018}
1019
1020static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1021{
1022 char *s = nd_get_link(nd);
1023 if (!IS_ERR(s))
1024 kfree(s);
1025}
1026
1027/**
1028 * gfs2_permission -
1029 * @inode: The inode
1030 * @mask: The mask to be tested
1031 * @flags: Indicates whether this is an RCU path walk or not
1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only
1035 * lock the glock if its not already been done.
1036 *
1037 * Returns: errno
1038 */
1039
1040int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1041{
1042 struct gfs2_inode *ip;
1043 struct gfs2_holder i_gh;
1044 int error;
1045 int unlock = 0;
1046
1047
1048 ip = GFS2_I(inode);
1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error)
1054 return error;
1055 unlock = 1;
1056 }
1057
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES;
1060 else
1061 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh);
1064
1065 return error;
1066}
1067
1068static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{
1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 u32 ouid, ogid, nuid, ngid;
1073 int error;
1074
1075 ouid = inode->i_uid;
1076 ogid = inode->i_gid;
1077 nuid = attr->ia_uid;
1078 ngid = attr->ia_gid;
1079
1080 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1081 ouid = nuid = NO_QUOTA_CHANGE;
1082 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1083 ogid = ngid = NO_QUOTA_CHANGE;
1084
1085 if (!gfs2_alloc_get(ip))
1086 return -ENOMEM;
1087
1088 error = gfs2_quota_lock(ip, nuid, ngid);
1089 if (error)
1090 goto out_alloc;
1091
1092 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1093 error = gfs2_quota_check(ip, nuid, ngid);
1094 if (error)
1095 goto out_gunlock_q;
1096 }
1097
1098 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1099 if (error)
1100 goto out_gunlock_q;
1101
1102 error = gfs2_setattr_simple(ip, attr);
1103 if (error)
1104 goto out_end_trans;
1105
1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
1109 gfs2_quota_change(ip, blocks, nuid, ngid);
1110 }
1111
1112out_end_trans:
1113 gfs2_trans_end(sdp);
1114out_gunlock_q:
1115 gfs2_quota_unlock(ip);
1116out_alloc:
1117 gfs2_alloc_put(ip);
1118 return error;
1119}
1120
1121/**
1122 * gfs2_setattr - Change attributes on an inode
1123 * @dentry: The dentry which is changing
1124 * @attr: The structure describing the change
1125 *
1126 * The VFS layer wants to change one or more of an inodes attributes. Write
1127 * that change out to disk.
1128 *
1129 * Returns: errno
1130 */
1131
1132static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1133{
1134 struct inode *inode = dentry->d_inode;
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_holder i_gh;
1137 int error;
1138
1139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1140 if (error)
1141 return error;
1142
1143 error = -EPERM;
1144 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1145 goto out;
1146
1147 error = inode_change_ok(inode, attr);
1148 if (error)
1149 goto out;
1150
1151 if (attr->ia_valid & ATTR_SIZE)
1152 error = gfs2_setattr_size(inode, attr->ia_size);
1153 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1154 error = setattr_chown(inode, attr);
1155 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1156 error = gfs2_acl_chmod(ip, attr);
1157 else
1158 error = gfs2_setattr_simple(ip, attr);
1159
1160out:
1161 gfs2_glock_dq_uninit(&i_gh);
1162 if (!error)
1163 mark_inode_dirty(inode);
1164 return error;
1165}
1166
1167/**
1168 * gfs2_getattr - Read out an inode's attributes
1169 * @mnt: The vfsmount the inode is being accessed from
1170 * @dentry: The dentry to stat
1171 * @stat: The inode's stats
1172 *
1173 * This may be called from the VFS directly, or from within GFS2 with the
1174 * inode locked, so we look to see if the glock is already locked and only
1175 * lock the glock if its not already been done. Note that its the NFS
1176 * readdirplus operation which causes this to be called (from filldir)
1177 * with the glock already held.
1178 *
1179 * Returns: errno
1180 */
1181
1182static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1183 struct kstat *stat)
1184{
1185 struct inode *inode = dentry->d_inode;
1186 struct gfs2_inode *ip = GFS2_I(inode);
1187 struct gfs2_holder gh;
1188 int error;
1189 int unlock = 0;
1190
1191 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1192 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1193 if (error)
1194 return error;
1195 unlock = 1;
1196 }
1197
1198 generic_fillattr(inode, stat);
1199 if (unlock)
1200 gfs2_glock_dq_uninit(&gh);
1201
1202 return 0;
1203}
1204
1205static int gfs2_setxattr(struct dentry *dentry, const char *name,
1206 const void *data, size_t size, int flags)
1207{
1208 struct inode *inode = dentry->d_inode;
1209 struct gfs2_inode *ip = GFS2_I(inode);
1210 struct gfs2_holder gh;
1211 int ret;
1212
1213 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1214 ret = gfs2_glock_nq(&gh);
1215 if (ret == 0) {
1216 ret = generic_setxattr(dentry, name, data, size, flags);
1217 gfs2_glock_dq(&gh);
1218 }
1219 gfs2_holder_uninit(&gh);
1220 return ret;
1221}
1222
1223static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1224 void *data, size_t size)
1225{
1226 struct inode *inode = dentry->d_inode;
1227 struct gfs2_inode *ip = GFS2_I(inode);
1228 struct gfs2_holder gh;
1229 int ret;
1230
1231 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1232 ret = gfs2_glock_nq(&gh);
1233 if (ret == 0) {
1234 ret = generic_getxattr(dentry, name, data, size);
1235 gfs2_glock_dq(&gh);
1236 }
1237 gfs2_holder_uninit(&gh);
1238 return ret;
1239}
1240
1241static int gfs2_removexattr(struct dentry *dentry, const char *name)
1242{
1243 struct inode *inode = dentry->d_inode;
1244 struct gfs2_inode *ip = GFS2_I(inode);
1245 struct gfs2_holder gh;
1246 int ret;
1247
1248 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1249 ret = gfs2_glock_nq(&gh);
1250 if (ret == 0) {
1251 ret = generic_removexattr(dentry, name);
1252 gfs2_glock_dq(&gh);
1253 }
1254 gfs2_holder_uninit(&gh);
1255 return ret;
1256}
1257
1258static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1259 u64 start, u64 len)
1260{
1261 struct gfs2_inode *ip = GFS2_I(inode);
1262 struct gfs2_holder gh;
1263 int ret;
1264
1265 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1266 if (ret)
1267 return ret;
1268
1269 mutex_lock(&inode->i_mutex);
1270
1271 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1272 if (ret)
1273 goto out;
1274
1275 if (gfs2_is_stuffed(ip)) {
1276 u64 phys = ip->i_no_addr << inode->i_blkbits;
1277 u64 size = i_size_read(inode);
1278 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1279 FIEMAP_EXTENT_DATA_INLINE;
1280 phys += sizeof(struct gfs2_dinode);
1281 phys += start;
1282 if (start + len > size)
1283 len = size - start;
1284 if (start < size)
1285 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1286 len, flags);
1287 if (ret == 1)
1288 ret = 0;
1289 } else {
1290 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1291 gfs2_block_map);
1292 }
1293
1294 gfs2_glock_dq_uninit(&gh);
1295out:
1296 mutex_unlock(&inode->i_mutex);
1297 return ret;
1298}
1299
1300const struct inode_operations gfs2_file_iops = {
1301 .permission = gfs2_permission,
1302 .setattr = gfs2_setattr,
1303 .getattr = gfs2_getattr,
1304 .setxattr = gfs2_setxattr,
1305 .getxattr = gfs2_getxattr,
1306 .listxattr = gfs2_listxattr,
1307 .removexattr = gfs2_removexattr,
1308 .fiemap = gfs2_fiemap,
1309};
1310
1311const struct inode_operations gfs2_dir_iops = {
1312 .create = gfs2_create,
1313 .lookup = gfs2_lookup,
1314 .link = gfs2_link,
1315 .unlink = gfs2_unlink,
1316 .symlink = gfs2_symlink,
1317 .mkdir = gfs2_mkdir,
1318 .rmdir = gfs2_rmdir,
1319 .mknod = gfs2_mknod,
1320 .rename = gfs2_rename,
1321 .permission = gfs2_permission,
1322 .setattr = gfs2_setattr,
1323 .getattr = gfs2_getattr,
1324 .setxattr = gfs2_setxattr,
1325 .getxattr = gfs2_getxattr,
1326 .listxattr = gfs2_listxattr,
1327 .removexattr = gfs2_removexattr,
1328 .fiemap = gfs2_fiemap,
1329};
1330
1331const struct inode_operations gfs2_symlink_iops = {
1332 .readlink = generic_readlink,
1333 .follow_link = gfs2_follow_link,
1334 .put_link = gfs2_put_link,
1335 .permission = gfs2_permission,
1336 .setattr = gfs2_setattr,
1337 .getattr = gfs2_getattr,
1338 .setxattr = gfs2_setxattr,
1339 .getxattr = gfs2_getxattr,
1340 .listxattr = gfs2_listxattr,
1341 .removexattr = gfs2_removexattr,
1342 .fiemap = gfs2_fiemap,
1343};
1344
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
38 38
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/mm.h>
41#include <linux/spinlock.h> 42#include <linux/spinlock.h>
42#include <linux/completion.h> 43#include <linux/completion.h>
43#include <linux/buffer_head.h> 44#include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 78static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 79static DEFINE_SPINLOCK(qd_lru_lock);
79 80
80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 81int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
81{ 82{
82 struct gfs2_quota_data *qd; 83 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 84 struct gfs2_sbd *sdp;
85 int nr_to_scan = sc->nr_to_scan;
84 86
85 if (nr == 0) 87 if (nr_to_scan == 0)
86 goto out; 88 goto out;
87 89
88 if (!(gfp_mask & __GFP_FS)) 90 if (!(sc->gfp_mask & __GFP_FS))
89 return -1; 91 return -1;
90 92
91 spin_lock(&qd_lru_lock); 93 spin_lock(&qd_lru_lock);
92 while (nr && !list_empty(&qd_lru_list)) { 94 while (nr_to_scan && !list_empty(&qd_lru_list)) {
93 qd = list_entry(qd_lru_list.next, 95 qd = list_entry(qd_lru_list.next,
94 struct gfs2_quota_data, qd_reclaim); 96 struct gfs2_quota_data, qd_reclaim);
95 sdp = qd->qd_gl->gl_sbd; 97 sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
110 spin_unlock(&qd_lru_lock); 112 spin_unlock(&qd_lru_lock);
111 kmem_cache_free(gfs2_quotad_cachep, qd); 113 kmem_cache_free(gfs2_quotad_cachep, qd);
112 spin_lock(&qd_lru_lock); 114 spin_lock(&qd_lru_lock);
113 nr--; 115 nr_to_scan--;
114 } 116 }
115 spin_unlock(&qd_lru_lock); 117 spin_unlock(&qd_lru_lock);
116 118
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
12 12
13struct gfs2_inode; 13struct gfs2_inode;
14struct gfs2_sbd; 14struct gfs2_sbd;
15struct shrink_control;
15 16
16#define NO_QUOTA_CHANGE ((u32)-1) 17#define NO_QUOTA_CHANGE ((u32)-1)
17 18
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 52 return ret;
52} 53}
53 54
54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); 55extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
56 struct shrink_control *sc);
55extern const struct quotactl_ops gfs2_quotactl_ops; 57extern const struct quotactl_ops gfs2_quotactl_ops;
56 58
57#endif /* __QUOTA_DOT_H__ */ 59#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
78 78
79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 unsigned char *buf2, unsigned int offset, 80 unsigned char *buf2, unsigned int offset,
81 unsigned int buflen, u32 block, 81 struct gfs2_bitmap *bi, u32 block,
82 unsigned char new_state) 82 unsigned char new_state)
83{ 83{
84 unsigned char *byte1, *byte2, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
85 unsigned int buflen = bi->bi_len;
85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 87
87 byte1 = buf1 + offset + (block / GFS2_NBBY); 88 byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 94
94 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
97 "new_state=%d\n",
98 (unsigned long long)block, cur_state, new_state);
99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
100 (unsigned long long)rgd->rd_addr,
101 (unsigned long)bi->bi_start);
102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
103 (unsigned long)bi->bi_offset,
104 (unsigned long)bi->bi_len);
105 dump_stack();
95 gfs2_consist_rgrpd(rgd); 106 gfs2_consist_rgrpd(rgd);
96 return; 107 return;
97 } 108 }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
381 392
382 if (gl) { 393 if (gl) {
383 gl->gl_object = NULL; 394 gl->gl_object = NULL;
395 gfs2_glock_add_to_lru(gl);
384 gfs2_glock_put(gl); 396 gfs2_glock_put(gl);
385 } 397 }
386 398
@@ -1365,7 +1377,7 @@ skip:
1365 1377
1366 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1367 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1368 bi->bi_len, blk, new_state); 1380 bi, blk, new_state);
1369 goal = blk; 1381 goal = blk;
1370 while (*n < elen) { 1382 while (*n < elen) {
1371 goal++; 1383 goal++;
@@ -1375,7 +1387,7 @@ skip:
1375 GFS2_BLKST_FREE) 1387 GFS2_BLKST_FREE)
1376 break; 1388 break;
1377 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1378 bi->bi_len, goal, new_state); 1390 bi, goal, new_state);
1379 (*n)++; 1391 (*n)++;
1380 } 1392 }
1381out: 1393out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1432 } 1444 }
1433 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1434 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1435 bi->bi_len, buf_blk, new_state); 1447 bi, buf_blk, new_state);
1436 } 1448 }
1437 1449
1438 return rgd; 1450 return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1630
1619 gfs2_trans_add_rg(rgd); 1631 gfs2_trans_add_rg(rgd);
1632
1633 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen);
1620} 1636}
1621 1637
1622/** 1638/**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
700 mutex_unlock(&sdp->sd_freeze_lock); 701 mutex_unlock(&sdp->sd_freeze_lock);
701} 702}
702 703
704void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
705{
706 struct gfs2_dinode *str = buf;
707
708 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
709 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
710 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
711 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
712 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
713 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
714 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
715 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
716 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
717 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
718 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
719 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
720 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
721 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
722
723 str->di_goal_meta = cpu_to_be64(ip->i_goal);
724 str->di_goal_data = cpu_to_be64(ip->i_goal);
725 str->di_generation = cpu_to_be64(ip->i_generation);
726
727 str->di_flags = cpu_to_be32(ip->i_diskflags);
728 str->di_height = cpu_to_be16(ip->i_height);
729 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
730 !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
731 GFS2_FORMAT_DE : 0);
732 str->di_depth = cpu_to_be16(ip->i_depth);
733 str->di_entries = cpu_to_be32(ip->i_entries);
734
735 str->di_eattr = cpu_to_be64(ip->i_eattr);
736 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
737 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
738 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
739}
703 740
704/** 741/**
705 * gfs2_write_inode - Make sure the inode is stable on the disk 742 * gfs2_write_inode - Make sure the inode is stable on the disk
706 * @inode: The inode 743 * @inode: The inode
707 * @sync: synchronous write flag 744 * @wbc: The writeback control structure
708 * 745 *
709 * Returns: errno 746 * Returns: errno
710 */ 747 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
713{ 750{
714 struct gfs2_inode *ip = GFS2_I(inode); 751 struct gfs2_inode *ip = GFS2_I(inode);
715 struct gfs2_sbd *sdp = GFS2_SB(inode); 752 struct gfs2_sbd *sdp = GFS2_SB(inode);
753 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
754 struct backing_dev_info *bdi = metamapping->backing_dev_info;
716 struct gfs2_holder gh; 755 struct gfs2_holder gh;
717 struct buffer_head *bh; 756 struct buffer_head *bh;
718 struct timespec atime; 757 struct timespec atime;
719 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
720 int ret = 0; 759 int ret = -EAGAIN;
721 760
722 /* Check this is a "normal" inode, etc */ 761 /* Skip timestamp update, if this is from a memalloc */
723 if (current->flags & PF_MEMALLOC) 762 if (current->flags & PF_MEMALLOC)
724 return 0; 763 goto do_flush;
725 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
726 if (ret) 765 if (ret)
727 goto do_flush; 766 goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
745do_flush: 784do_flush:
746 if (wbc->sync_mode == WB_SYNC_ALL) 785 if (wbc->sync_mode == WB_SYNC_ALL)
747 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
787 filemap_fdatawrite(metamapping);
788 if (bdi->dirty_exceeded)
789 gfs2_ail1_flush(sdp, wbc);
790 if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
791 ret = filemap_fdatawait(metamapping);
792 if (ret)
793 mark_inode_dirty_sync(inode);
748 return ret; 794 return ret;
749} 795}
750 796
@@ -874,8 +920,9 @@ restart:
874 920
875static int gfs2_sync_fs(struct super_block *sb, int wait) 921static int gfs2_sync_fs(struct super_block *sb, int wait)
876{ 922{
877 if (wait && sb->s_fs_info) 923 struct gfs2_sbd *sdp = sb->s_fs_info;
878 gfs2_log_flush(sb->s_fs_info, NULL); 924 if (wait && sdp)
925 gfs2_log_flush(sdp, NULL);
879 return 0; 926 return 0;
880} 927}
881 928
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1308 return 0; 1355 return 0;
1309} 1356}
1310 1357
1358static void gfs2_final_release_pages(struct gfs2_inode *ip)
1359{
1360 struct inode *inode = &ip->i_inode;
1361 struct gfs2_glock *gl = ip->i_gl;
1362
1363 truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1364 truncate_inode_pages(&inode->i_data, 0);
1365
1366 if (atomic_read(&gl->gl_revokes) == 0) {
1367 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1368 clear_bit(GLF_DIRTY, &gl->gl_flags);
1369 }
1370}
1371
1372static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1373{
1374 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1375 struct gfs2_alloc *al;
1376 struct gfs2_rgrpd *rgd;
1377 int error;
1378
1379 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1380 gfs2_consist_inode(ip);
1381 return -EIO;
1382 }
1383
1384 al = gfs2_alloc_get(ip);
1385 if (!al)
1386 return -ENOMEM;
1387
1388 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1389 if (error)
1390 goto out;
1391
1392 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1393 if (error)
1394 goto out_qs;
1395
1396 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1397 if (!rgd) {
1398 gfs2_consist_inode(ip);
1399 error = -EIO;
1400 goto out_rindex_relse;
1401 }
1402
1403 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1404 &al->al_rgd_gh);
1405 if (error)
1406 goto out_rindex_relse;
1407
1408 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1409 sdp->sd_jdesc->jd_blocks);
1410 if (error)
1411 goto out_rg_gunlock;
1412
1413 gfs2_free_di(rgd, ip);
1414
1415 gfs2_final_release_pages(ip);
1416
1417 gfs2_trans_end(sdp);
1418
1419out_rg_gunlock:
1420 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1421out_rindex_relse:
1422 gfs2_glock_dq_uninit(&al->al_ri_gh);
1423out_qs:
1424 gfs2_quota_unhold(ip);
1425out:
1426 gfs2_alloc_put(ip);
1427 return error;
1428}
1429
1311/* 1430/*
1312 * We have to (at the moment) hold the inodes main lock to cover 1431 * We have to (at the moment) hold the inodes main lock to cover
1313 * the gap between unlocking the shared lock on the iopen lock and 1432 * the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
1371 } 1490 }
1372 1491
1373 error = gfs2_dinode_dealloc(ip); 1492 error = gfs2_dinode_dealloc(ip);
1374 if (error) 1493 goto out_unlock;
1375 goto out_unlock;
1376 1494
1377out_truncate: 1495out_truncate:
1378 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1379 if (error) 1497 if (error)
1380 goto out_unlock; 1498 goto out_unlock;
1381 /* Needs to be done before glock release & also in a transaction */ 1499 gfs2_final_release_pages(ip);
1382 truncate_inode_pages(&inode->i_data, 0);
1383 gfs2_trans_end(sdp); 1500 gfs2_trans_end(sdp);
1384 1501
1385out_unlock: 1502out_unlock:
@@ -1394,6 +1511,7 @@ out:
1394 end_writeback(inode); 1511 end_writeback(inode);
1395 1512
1396 ip->i_gl->gl_object = NULL; 1513 ip->i_gl->gl_object = NULL;
1514 gfs2_glock_add_to_lru(ip->i_gl);
1397 gfs2_glock_put(ip->i_gl); 1515 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL; 1516 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) { 1517 if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
81 81
82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) 82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
83{ 83{
84 const u8 *uuid = sdp->sd_sb.sb_uuid; 84 struct super_block *s = sdp->sd_vfs;
85 const u8 *uuid = s->s_uuid;
85 buf[0] = '\0'; 86 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 87 if (!gfs2_uuid_valid(uuid))
87 return 0; 88 return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
616 struct kobj_uevent_env *env) 617 struct kobj_uevent_env *env)
617{ 618{
618 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 619 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
619 const u8 *uuid = sdp->sd_sb.sb_uuid; 620 struct super_block *s = sdp->sd_vfs;
621 const u8 *uuid = s->s_uuid;
620 622
621 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 623 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
622 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 624 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h>
13#include "incore.h" 14#include "incore.h"
14#include "glock.h" 15#include "glock.h"
15 16
@@ -40,7 +41,9 @@
40 {(1UL << GLF_REPLY_PENDING), "r" }, \ 41 {(1UL << GLF_REPLY_PENDING), "r" }, \
41 {(1UL << GLF_INITIAL), "I" }, \ 42 {(1UL << GLF_INITIAL), "I" }, \
42 {(1UL << GLF_FROZEN), "F" }, \ 43 {(1UL << GLF_FROZEN), "F" }, \
43 {(1UL << GLF_QUEUED), "q" }) 44 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" })
44 47
45#ifndef NUMPTY 48#ifndef NUMPTY
46#define NUMPTY 49#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
94 __entry->new_state = glock_trace_state(new_state); 97 __entry->new_state = glock_trace_state(new_state);
95 __entry->tgt_state = glock_trace_state(gl->gl_target); 98 __entry->tgt_state = glock_trace_state(gl->gl_target);
96 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 99 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
97 __entry->flags = gl->gl_flags; 100 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
98 ), 101 ),
99 102
100 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", 103 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
127 __entry->gltype = gl->gl_name.ln_type; 130 __entry->gltype = gl->gl_name.ln_type;
128 __entry->glnum = gl->gl_name.ln_number; 131 __entry->glnum = gl->gl_name.ln_number;
129 __entry->cur_state = glock_trace_state(gl->gl_state); 132 __entry->cur_state = glock_trace_state(gl->gl_state);
130 __entry->flags = gl->gl_flags; 133 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
131 ), 134 ),
132 135
133 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", 136 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
161 __entry->glnum = gl->gl_name.ln_number; 164 __entry->glnum = gl->gl_name.ln_number;
162 __entry->cur_state = glock_trace_state(gl->gl_state); 165 __entry->cur_state = glock_trace_state(gl->gl_state);
163 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 166 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
164 __entry->flags = gl->gl_flags; 167 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
165 ), 168 ),
166 169
167 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", 170 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
318 MINOR(__entry->dev), __entry->blocks) 321 MINOR(__entry->dev), __entry->blocks)
319); 322);
320 323
324/* Writing back the AIL */
325TRACE_EVENT(gfs2_ail_flush,
326
327 TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
328
329 TP_ARGS(sdp, wbc, start),
330
331 TP_STRUCT__entry(
332 __field( dev_t, dev )
333 __field( int, start )
334 __field( int, sync_mode )
335 __field( long, nr_to_write )
336 ),
337
338 TP_fast_assign(
339 __entry->dev = sdp->sd_vfs->s_dev;
340 __entry->start = start;
341 __entry->sync_mode = wbc->sync_mode;
342 __entry->nr_to_write = wbc->nr_to_write;
343 ),
344
345 TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
346 MINOR(__entry->dev), __entry->start ? "start" : "end",
347 __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
348 __entry->nr_to_write)
349);
350
321/* Section 3 - bmap 351/* Section 3 - bmap
322 * 352 *
323 * Objectives: 353 * Objectives:
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index b4d70b13be92..1cb70cdba2c1 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -253,6 +253,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
253 struct inode *inode = dentry->d_inode; 253 struct inode *inode = dentry->d_inode;
254 int res; 254 int res;
255 255
256 if (S_ISDIR(inode->i_mode))
257 dentry_unhash(dentry);
258
256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2) 259 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
257 return -ENOTEMPTY; 260 return -ENOTEMPTY;
258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 261 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
@@ -283,6 +286,9 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
283 286
284 /* Unlink destination if it already exists */ 287 /* Unlink destination if it already exists */
285 if (new_dentry->d_inode) { 288 if (new_dentry->d_inode) {
289 if (S_ISDIR(new_dentry->d_inode->i_mode))
290 dentry_unhash(new_dentry);
291
286 res = hfs_remove(new_dir, new_dentry); 292 res = hfs_remove(new_dir, new_dentry);
287 if (res) 293 if (res)
288 return res; 294 return res;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059c25da..b28835091dd0 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -370,6 +370,8 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
370 struct inode *inode = dentry->d_inode; 370 struct inode *inode = dentry->d_inode;
371 int res; 371 int res;
372 372
373 dentry_unhash(dentry);
374
373 if (inode->i_size != 2) 375 if (inode->i_size != 2)
374 return -ENOTEMPTY; 376 return -ENOTEMPTY;
375 377
@@ -467,10 +469,12 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
467 469
468 /* Unlink destination if it already exists */ 470 /* Unlink destination if it already exists */
469 if (new_dentry->d_inode) { 471 if (new_dentry->d_inode) {
470 if (S_ISDIR(new_dentry->d_inode->i_mode)) 472 if (S_ISDIR(new_dentry->d_inode->i_mode)) {
473 dentry_unhash(new_dentry);
471 res = hfsplus_rmdir(new_dir, new_dentry); 474 res = hfsplus_rmdir(new_dir, new_dentry);
472 else 475 } else {
473 res = hfsplus_unlink(new_dir, new_dentry); 476 res = hfsplus_unlink(new_dir, new_dentry);
477 }
474 if (res) 478 if (res)
475 return res; 479 return res;
476 } 480 }
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2638c834ed28..e6816b9e6903 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -683,6 +683,8 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
683 char *file; 683 char *file;
684 int err; 684 int err;
685 685
686 dentry_unhash(dentry);
687
686 if ((file = dentry_name(dentry)) == NULL) 688 if ((file = dentry_name(dentry)) == NULL)
687 return -ENOMEM; 689 return -ENOMEM;
688 err = do_rmdir(file); 690 err = do_rmdir(file);
@@ -736,6 +738,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
736 char *from_name, *to_name; 738 char *from_name, *to_name;
737 int err; 739 int err;
738 740
741 if (to->d_inode && S_ISDIR(to->d_inode->i_mode))
742 dentry_unhash(to);
743
739 if ((from_name = dentry_name(from)) == NULL) 744 if ((from_name = dentry_name(from)) == NULL)
740 return -ENOMEM; 745 return -ENOMEM;
741 if ((to_name = dentry_name(to)) == NULL) { 746 if ((to_name = dentry_name(to)) == NULL) {
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 1f05839c27a7..ff0ce21c0867 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -395,7 +395,6 @@ again:
395 395
396 dentry_unhash(dentry); 396 dentry_unhash(dentry);
397 if (!d_unhashed(dentry)) { 397 if (!d_unhashed(dentry)) {
398 dput(dentry);
399 hpfs_unlock(dir->i_sb); 398 hpfs_unlock(dir->i_sb);
400 return -ENOSPC; 399 return -ENOSPC;
401 } 400 }
@@ -403,7 +402,6 @@ again:
403 !S_ISREG(inode->i_mode) || 402 !S_ISREG(inode->i_mode) ||
404 get_write_access(inode)) { 403 get_write_access(inode)) {
405 d_rehash(dentry); 404 d_rehash(dentry);
406 dput(dentry);
407 } else { 405 } else {
408 struct iattr newattrs; 406 struct iattr newattrs;
409 /*printk("HPFS: truncating file before delete.\n");*/ 407 /*printk("HPFS: truncating file before delete.\n");*/
@@ -411,7 +409,6 @@ again:
411 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 409 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
412 err = notify_change(dentry, &newattrs); 410 err = notify_change(dentry, &newattrs);
413 put_write_access(inode); 411 put_write_access(inode);
414 dput(dentry);
415 if (!err) 412 if (!err)
416 goto again; 413 goto again;
417 } 414 }
@@ -442,6 +439,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
442 int err; 439 int err;
443 int r; 440 int r;
444 441
442 dentry_unhash(dentry);
443
445 hpfs_adjust_length(name, &len); 444 hpfs_adjust_length(name, &len);
446 hpfs_lock(dir->i_sb); 445 hpfs_lock(dir->i_sb);
447 err = -ENOENT; 446 err = -ENOENT;
@@ -535,6 +534,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
535 struct buffer_head *bh; 534 struct buffer_head *bh;
536 struct fnode *fnode; 535 struct fnode *fnode;
537 int err; 536 int err;
537
538 if (new_inode && S_ISDIR(new_inode->i_mode))
539 dentry_unhash(new_dentry);
540
538 if ((err = hpfs_chk_name(new_name, &new_len))) return err; 541 if ((err = hpfs_chk_name(new_name, &new_len))) return err;
539 err = 0; 542 err = 0;
540 hpfs_adjust_length(old_name, &old_len); 543 hpfs_adjust_length(old_name, &old_len);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..7aafeb8fa300 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
412 pgoff = offset >> PAGE_SHIFT; 412 pgoff = offset >> PAGE_SHIFT;
413 413
414 i_size_write(inode, offset); 414 i_size_write(inode, offset);
415 spin_lock(&mapping->i_mmap_lock); 415 mutex_lock(&mapping->i_mmap_mutex);
416 if (!prio_tree_empty(&mapping->i_mmap)) 416 if (!prio_tree_empty(&mapping->i_mmap))
417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
418 spin_unlock(&mapping->i_mmap_lock); 418 mutex_unlock(&mapping->i_mmap_mutex);
419 truncate_hugepages(inode, offset); 419 truncate_hugepages(inode, offset);
420 return 0; 420 return 0;
421} 421}
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void)
921 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 921 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
922} 922}
923 923
924struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 924struct file *hugetlb_file_setup(const char *name, size_t size,
925 vm_flags_t acctflag,
925 struct user_struct **user, int creat_flags) 926 struct user_struct **user, int creat_flags)
926{ 927{
927 int error = -ENOMEM; 928 int error = -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/prefetch.h>
27#include <linux/ima.h> 28#include <linux/ima.h>
28#include <linux/cred.h> 29#include <linux/cred.h>
29#include "internal.h" 30#include "internal.h"
@@ -325,12 +326,11 @@ void address_space_init_once(struct address_space *mapping)
325 memset(mapping, 0, sizeof(*mapping)); 326 memset(mapping, 0, sizeof(*mapping));
326 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); 327 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
327 spin_lock_init(&mapping->tree_lock); 328 spin_lock_init(&mapping->tree_lock);
328 spin_lock_init(&mapping->i_mmap_lock); 329 mutex_init(&mapping->i_mmap_mutex);
329 INIT_LIST_HEAD(&mapping->private_list); 330 INIT_LIST_HEAD(&mapping->private_list);
330 spin_lock_init(&mapping->private_lock); 331 spin_lock_init(&mapping->private_lock);
331 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 332 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
332 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 333 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
333 mutex_init(&mapping->unmap_mutex);
334} 334}
335EXPORT_SYMBOL(address_space_init_once); 335EXPORT_SYMBOL(address_space_init_once);
336 336
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
751 * This function is passed the number of inodes to scan, and it returns the 751 * This function is passed the number of inodes to scan, and it returns the
752 * total number of remaining possibly-reclaimable inodes. 752 * total number of remaining possibly-reclaimable inodes.
753 */ 753 */
754static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 754static int shrink_icache_memory(struct shrinker *shrink,
755 struct shrink_control *sc)
755{ 756{
757 int nr = sc->nr_to_scan;
758 gfp_t gfp_mask = sc->gfp_mask;
759
756 if (nr) { 760 if (nr) {
757 /* 761 /*
758 * Nasty deadlock avoidance. We may hold various FS locks, 762 * Nasty deadlock avoidance. We may hold various FS locks,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 69b180459463..72ffa974b0b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal)
302 * all outstanding updates to complete. 302 * all outstanding updates to complete.
303 */ 303 */
304 304
305#ifdef COMMIT_STATS
306 spin_lock(&journal->j_list_lock);
307 summarise_journal_usage(journal);
308 spin_unlock(&journal->j_list_lock);
309#endif
310
311 /* Do we need to erase the effects of a prior journal_flush? */ 305 /* Do we need to erase the effects of a prior journal_flush? */
312 if (journal->j_flags & JFS_FLUSHED) { 306 if (journal->j_flags & JFS_FLUSHED) {
313 jbd_debug(3, "super block updated\n"); 307 jbd_debug(3, "super block updated\n");
@@ -722,8 +716,13 @@ wait_for_iobuf:
722 required. */ 716 required. */
723 JBUFFER_TRACE(jh, "file as BJ_Forget"); 717 JBUFFER_TRACE(jh, "file as BJ_Forget");
724 journal_file_buffer(jh, commit_transaction, BJ_Forget); 718 journal_file_buffer(jh, commit_transaction, BJ_Forget);
725 /* Wake up any transactions which were waiting for this 719 /*
726 IO to complete */ 720 * Wake up any transactions which were waiting for this
721 * IO to complete. The barrier must be here so that changes
722 * by journal_file_buffer() take effect before wake_up_bit()
723 * does the waitqueue check.
724 */
725 smp_mb();
727 wake_up_bit(&bh->b_state, BH_Unshadow); 726 wake_up_bit(&bh->b_state, BH_Unshadow);
728 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 727 JBUFFER_TRACE(jh, "brelse shadowed buffer");
729 __brelse(bh); 728 __brelse(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b3713afaaa9e..e2d4285fbe90 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal)
437int __log_start_commit(journal_t *journal, tid_t target) 437int __log_start_commit(journal_t *journal, tid_t target)
438{ 438{
439 /* 439 /*
440 * Are we already doing a recent enough commit? 440 * The only transaction we can possibly wait upon is the
441 * currently running transaction (if it exists). Otherwise,
442 * the target tid must be an old one.
441 */ 443 */
442 if (!tid_geq(journal->j_commit_request, target)) { 444 if (journal->j_running_transaction &&
445 journal->j_running_transaction->t_tid == target) {
443 /* 446 /*
444 * We want a new commit: OK, mark the request and wakeup the 447 * We want a new commit: OK, mark the request and wakeup the
445 * commit thread. We do _not_ do the commit ourselves. 448 * commit thread. We do _not_ do the commit ourselves.
@@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target)
451 journal->j_commit_sequence); 454 journal->j_commit_sequence);
452 wake_up(&journal->j_wait_commit); 455 wake_up(&journal->j_wait_commit);
453 return 1; 456 return 1;
454 } 457 } else if (!tid_geq(journal->j_commit_request, target))
458 /* This should never happen, but if it does, preserve
459 the evidence before kjournald goes into a loop and
460 increments j_commit_sequence beyond all recognition. */
461 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
462 journal->j_commit_request, journal->j_commit_sequence,
463 target, journal->j_running_transaction ?
464 journal->j_running_transaction->t_tid : 0);
455 return 0; 465 return 0;
456} 466}
457 467
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d2319651b2..f7ee81a065da 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks)
266 * This function is visible to journal users (like ext3fs), so is not 266 * This function is visible to journal users (like ext3fs), so is not
267 * called with the journal already locked. 267 * called with the journal already locked.
268 * 268 *
269 * Return a pointer to a newly allocated handle, or NULL on failure 269 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
270 * on failure.
270 */ 271 */
271handle_t *journal_start(journal_t *journal, int nblocks) 272handle_t *journal_start(journal_t *journal, int nblocks)
272{ 273{
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b21..7f21cf3aaf92 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
219 ret = err; 219 ret = err;
220 spin_lock(&journal->j_list_lock); 220 spin_lock(&journal->j_list_lock);
221 J_ASSERT(jinode->i_transaction == commit_transaction); 221 J_ASSERT(jinode->i_transaction == commit_transaction);
222 commit_transaction->t_flushed_data_blocks = 1;
223 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); 222 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
224 smp_mb__after_clear_bit(); 223 smp_mb__after_clear_bit();
225 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 224 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -338,12 +337,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
338 * all outstanding updates to complete. 337 * all outstanding updates to complete.
339 */ 338 */
340 339
341#ifdef COMMIT_STATS
342 spin_lock(&journal->j_list_lock);
343 summarise_journal_usage(journal);
344 spin_unlock(&journal->j_list_lock);
345#endif
346
347 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
348 if (journal->j_flags & JBD2_FLUSHED) { 341 if (journal->j_flags & JBD2_FLUSHED) {
349 jbd_debug(3, "super block updated\n"); 342 jbd_debug(3, "super block updated\n");
@@ -678,12 +671,16 @@ start_journal_io:
678 err = 0; 671 err = 0;
679 } 672 }
680 673
674 write_lock(&journal->j_state_lock);
675 J_ASSERT(commit_transaction->t_state == T_COMMIT);
676 commit_transaction->t_state = T_COMMIT_DFLUSH;
677 write_unlock(&journal->j_state_lock);
681 /* 678 /*
682 * If the journal is not located on the file system device, 679 * If the journal is not located on the file system device,
683 * then we must flush the file system device before we issue 680 * then we must flush the file system device before we issue
684 * the commit record 681 * the commit record
685 */ 682 */
686 if (commit_transaction->t_flushed_data_blocks && 683 if (commit_transaction->t_need_data_flush &&
687 (journal->j_fs_dev != journal->j_dev) && 684 (journal->j_fs_dev != journal->j_dev) &&
688 (journal->j_flags & JBD2_BARRIER)) 685 (journal->j_flags & JBD2_BARRIER))
689 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 686 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
@@ -760,8 +757,13 @@ wait_for_iobuf:
760 required. */ 757 required. */
761 JBUFFER_TRACE(jh, "file as BJ_Forget"); 758 JBUFFER_TRACE(jh, "file as BJ_Forget");
762 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); 759 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
763 /* Wake up any transactions which were waiting for this 760 /*
764 IO to complete */ 761 * Wake up any transactions which were waiting for this IO to
762 * complete. The barrier must be here so that changes by
763 * jbd2_journal_file_buffer() take effect before wake_up_bit()
764 * does the waitqueue check.
765 */
766 smp_mb();
765 wake_up_bit(&bh->b_state, BH_Unshadow); 767 wake_up_bit(&bh->b_state, BH_Unshadow);
766 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 768 JBUFFER_TRACE(jh, "brelse shadowed buffer");
767 __brelse(bh); 769 __brelse(bh);
@@ -800,6 +802,10 @@ wait_for_iobuf:
800 jbd2_journal_abort(journal, err); 802 jbd2_journal_abort(journal, err);
801 803
802 jbd_debug(3, "JBD: commit phase 5\n"); 804 jbd_debug(3, "JBD: commit phase 5\n");
805 write_lock(&journal->j_state_lock);
806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
807 commit_transaction->t_state = T_COMMIT_JFLUSH;
808 write_unlock(&journal->j_state_lock);
803 809
804 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 810 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
805 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 811 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -955,7 +961,7 @@ restart_loop:
955 961
956 jbd_debug(3, "JBD: commit phase 7\n"); 962 jbd_debug(3, "JBD: commit phase 7\n");
957 963
958 J_ASSERT(commit_transaction->t_state == T_COMMIT); 964 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
959 965
960 commit_transaction->t_start = jiffies; 966 commit_transaction->t_start = jiffies;
961 stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, 967 stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e0ec3db1c395..9a7826990304 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal)
479int __jbd2_log_start_commit(journal_t *journal, tid_t target) 479int __jbd2_log_start_commit(journal_t *journal, tid_t target)
480{ 480{
481 /* 481 /*
482 * Are we already doing a recent enough commit? 482 * The only transaction we can possibly wait upon is the
483 * currently running transaction (if it exists). Otherwise,
484 * the target tid must be an old one.
483 */ 485 */
484 if (!tid_geq(journal->j_commit_request, target)) { 486 if (journal->j_running_transaction &&
487 journal->j_running_transaction->t_tid == target) {
485 /* 488 /*
486 * We want a new commit: OK, mark the request and wakeup the 489 * We want a new commit: OK, mark the request and wakeup the
487 * commit thread. We do _not_ do the commit ourselves. 490 * commit thread. We do _not_ do the commit ourselves.
@@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
493 journal->j_commit_sequence); 496 journal->j_commit_sequence);
494 wake_up(&journal->j_wait_commit); 497 wake_up(&journal->j_wait_commit);
495 return 1; 498 return 1;
496 } 499 } else if (!tid_geq(journal->j_commit_request, target))
500 /* This should never happen, but if it does, preserve
501 the evidence before kjournald goes into a loop and
502 increments j_commit_sequence beyond all recognition. */
503 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
504 journal->j_commit_request,
505 journal->j_commit_sequence,
506 target, journal->j_running_transaction ?
507 journal->j_running_transaction->t_tid : 0);
497 return 0; 508 return 0;
498} 509}
499 510
@@ -577,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
577} 588}
578 589
579/* 590/*
591 * Return 1 if a given transaction has not yet sent barrier request
592 * connected with a transaction commit. If 0 is returned, transaction
593 * may or may not have sent the barrier. Used to avoid sending barrier
594 * twice in common cases.
595 */
596int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
597{
598 int ret = 0;
599 transaction_t *commit_trans;
600
601 if (!(journal->j_flags & JBD2_BARRIER))
602 return 0;
603 read_lock(&journal->j_state_lock);
604 /* Transaction already committed? */
605 if (tid_geq(journal->j_commit_sequence, tid))
606 goto out;
607 commit_trans = journal->j_committing_transaction;
608 if (!commit_trans || commit_trans->t_tid != tid) {
609 ret = 1;
610 goto out;
611 }
612 /*
613 * Transaction is being committed and we already proceeded to
614 * submitting a flush to fs partition?
615 */
616 if (journal->j_fs_dev != journal->j_dev) {
617 if (!commit_trans->t_need_data_flush ||
618 commit_trans->t_state >= T_COMMIT_DFLUSH)
619 goto out;
620 } else {
621 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
622 goto out;
623 }
624 ret = 1;
625out:
626 read_unlock(&journal->j_state_lock);
627 return ret;
628}
629EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
630
631/*
580 * Wait for a specified commit to complete. 632 * Wait for a specified commit to complete.
581 * The caller may not hold the journal lock. 633 * The caller may not hold the journal lock.
582 */ 634 */
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 05fa77a23711..3eec82d32fd4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
82 */ 82 */
83 83
84/* 84/*
85 * Update transiaction's maximum wait time, if debugging is enabled. 85 * Update transaction's maximum wait time, if debugging is enabled.
86 * 86 *
87 * In order for t_max_wait to be reliable, it must be protected by a 87 * In order for t_max_wait to be reliable, it must be protected by a
88 * lock. But doing so will mean that start_this_handle() can not be 88 * lock. But doing so will mean that start_this_handle() can not be
@@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
91 * means that maximum wait time reported by the jbd2_run_stats 91 * means that maximum wait time reported by the jbd2_run_stats
92 * tracepoint will always be zero. 92 * tracepoint will always be zero.
93 */ 93 */
94static inline void update_t_max_wait(transaction_t *transaction) 94static inline void update_t_max_wait(transaction_t *transaction,
95 unsigned long ts)
95{ 96{
96#ifdef CONFIG_JBD2_DEBUG 97#ifdef CONFIG_JBD2_DEBUG
97 unsigned long ts = jiffies;
98
99 if (jbd2_journal_enable_debug && 98 if (jbd2_journal_enable_debug &&
100 time_after(transaction->t_start, ts)) { 99 time_after(transaction->t_start, ts)) {
101 ts = jbd2_time_diff(ts, transaction->t_start); 100 ts = jbd2_time_diff(ts, transaction->t_start);
@@ -121,6 +120,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
121 tid_t tid; 120 tid_t tid;
122 int needed, need_to_start; 121 int needed, need_to_start;
123 int nblocks = handle->h_buffer_credits; 122 int nblocks = handle->h_buffer_credits;
123 unsigned long ts = jiffies;
124 124
125 if (nblocks > journal->j_max_transaction_buffers) { 125 if (nblocks > journal->j_max_transaction_buffers) {
126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -271,7 +271,7 @@ repeat:
271 /* OK, account for the buffers that this operation expects to 271 /* OK, account for the buffers that this operation expects to
272 * use and add the handle to the running transaction. 272 * use and add the handle to the running transaction.
273 */ 273 */
274 update_t_max_wait(transaction); 274 update_t_max_wait(transaction, ts);
275 handle->h_transaction = transaction; 275 handle->h_transaction = transaction;
276 atomic_inc(&transaction->t_updates); 276 atomic_inc(&transaction->t_updates);
277 atomic_inc(&transaction->t_handle_count); 277 atomic_inc(&transaction->t_handle_count);
@@ -316,7 +316,8 @@ static handle_t *new_handle(int nblocks)
316 * This function is visible to journal users (like ext3fs), so is not 316 * This function is visible to journal users (like ext3fs), so is not
317 * called with the journal already locked. 317 * called with the journal already locked.
318 * 318 *
319 * Return a pointer to a newly allocated handle, or NULL on failure 319 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
320 * on failure.
320 */ 321 */
321handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) 322handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
322{ 323{
@@ -921,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
921 */ 922 */
922 JBUFFER_TRACE(jh, "cancelling revoke"); 923 JBUFFER_TRACE(jh, "cancelling revoke");
923 jbd2_journal_cancel_revoke(handle, jh); 924 jbd2_journal_cancel_revoke(handle, jh);
924 jbd2_journal_put_journal_head(jh);
925out: 925out:
926 jbd2_journal_put_journal_head(jh);
926 return err; 927 return err;
927} 928}
928 929
@@ -2147,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
2147 jinode->i_next_transaction == transaction) 2148 jinode->i_next_transaction == transaction)
2148 goto done; 2149 goto done;
2149 2150
2151 /*
2152 * We only ever set this variable to 1 so the test is safe. Since
2153 * t_need_data_flush is likely to be set, we do the test to save some
2154 * cacheline bouncing
2155 */
2156 if (!transaction->t_need_data_flush)
2157 transaction->t_need_data_flush = 1;
2150 /* On some different transaction's list - should be 2158 /* On some different transaction's list - should be
2151 * the committing one */ 2159 * the committing one */
2152 if (jinode->i_transaction) { 2160 if (jinode->i_transaction) {
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 82faddd1f321..05f73328b28b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -609,6 +609,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
609 int ret; 609 int ret;
610 uint32_t now = get_seconds(); 610 uint32_t now = get_seconds();
611 611
612 dentry_unhash(dentry);
613
612 for (fd = f->dents ; fd; fd = fd->next) { 614 for (fd = f->dents ; fd; fd = fd->next) {
613 if (fd->ino) 615 if (fd->ino)
614 return -ENOTEMPTY; 616 return -ENOTEMPTY;
@@ -784,6 +786,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
784 uint8_t type; 786 uint8_t type;
785 uint32_t now; 787 uint32_t now;
786 788
789 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
790 dentry_unhash(new_dentry);
791
787 /* The VFS will check for us and prevent trying to rename a 792 /* The VFS will check for us and prevent trying to rename a
788 * file over a directory and vice versa, but if it's a directory, 793 * file over a directory and vice versa, but if it's a directory,
789 * the VFS can't check whether the victim is empty. The filesystem 794 * the VFS can't check whether the victim is empty. The filesystem
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index eaaf2b511e89..865df16a6cf3 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -360,6 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
360 360
361 jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); 361 jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
362 362
363 dentry_unhash(dentry);
364
363 /* Init inode for quota operations. */ 365 /* Init inode for quota operations. */
364 dquot_initialize(dip); 366 dquot_initialize(dip);
365 dquot_initialize(ip); 367 dquot_initialize(ip);
@@ -1095,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1095 jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, 1097 jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
1096 new_dentry->d_name.name); 1098 new_dentry->d_name.name);
1097 1099
1100 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
1101 dentry_unhash(new_dentry);
1102
1098 dquot_initialize(old_dir); 1103 dquot_initialize(old_dir);
1099 dquot_initialize(new_dir); 1104 dquot_initialize(new_dir);
1100 1105
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/prefetch.h>
13 14
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 16
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 9ed89d1663f8..f34c9cde9e94 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -273,6 +273,8 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
273{ 273{
274 struct inode *inode = dentry->d_inode; 274 struct inode *inode = dentry->d_inode;
275 275
276 dentry_unhash(dentry);
277
276 if (!logfs_empty_dir(inode)) 278 if (!logfs_empty_dir(inode))
277 return -ENOTEMPTY; 279 return -ENOTEMPTY;
278 280
@@ -622,6 +624,9 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry,
622 loff_t pos; 624 loff_t pos;
623 int err; 625 int err;
624 626
627 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
628 dentry_unhash(new_dentry);
629
625 /* 1. locate source dd */ 630 /* 1. locate source dd */
626 err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); 631 err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
627 if (err) 632 if (err)
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
481 val = inode_val0(inode); 481 val = inode_val0(inode);
482 break; 482 break;
483 case INODE_USED_OFS: 483 case INODE_USED_OFS:
484 val = cpu_to_be64(li->li_used_bytes);; 484 val = cpu_to_be64(li->li_used_bytes);
485 break; 485 break;
486 case INODE_SIZE_OFS: 486 case INODE_SIZE_OFS:
487 val = cpu_to_be64(i_size_read(inode)); 487 val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
90 * What the mbcache registers as to get shrunk dynamically. 90 * What the mbcache registers as to get shrunk dynamically.
91 */ 91 */
92 92
93static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); 93static int mb_cache_shrink_fn(struct shrinker *shrink,
94 struct shrink_control *sc);
94 95
95static struct shrinker mb_cache_shrinker = { 96static struct shrinker mb_cache_shrinker = {
96 .shrink = mb_cache_shrink_fn, 97 .shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
156 * gets low. 157 * gets low.
157 * 158 *
158 * @shrink: (ignored) 159 * @shrink: (ignored)
159 * @nr_to_scan: Number of objects to scan 160 * @sc: shrink_control passed from reclaim
160 * @gfp_mask: (ignored)
161 * 161 *
162 * Returns the number of objects which are present in the cache. 162 * Returns the number of objects which are present in the cache.
163 */ 163 */
164static int 164static int
165mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 165mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
166{ 166{
167 LIST_HEAD(free_list); 167 LIST_HEAD(free_list);
168 struct mb_cache *cache; 168 struct mb_cache *cache;
169 struct mb_cache_entry *entry, *tmp; 169 struct mb_cache_entry *entry, *tmp;
170 int count = 0; 170 int count = 0;
171 int nr_to_scan = sc->nr_to_scan;
172 gfp_t gfp_mask = sc->gfp_mask;
171 173
172 mb_debug("trying to free %d entries", nr_to_scan); 174 mb_debug("trying to free %d entries", nr_to_scan);
173 spin_lock(&mb_cache_spinlock); 175 spin_lock(&mb_cache_spinlock);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 6e6777f1b4b2..f60aed8db9c4 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -168,6 +168,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
168 struct inode * inode = dentry->d_inode; 168 struct inode * inode = dentry->d_inode;
169 int err = -ENOTEMPTY; 169 int err = -ENOTEMPTY;
170 170
171 dentry_unhash(dentry);
172
171 if (minix_empty_dir(inode)) { 173 if (minix_empty_dir(inode)) {
172 err = minix_unlink(dir, dentry); 174 err = minix_unlink(dir, dentry);
173 if (!err) { 175 if (!err) {
@@ -190,6 +192,9 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
190 struct minix_dir_entry * old_de; 192 struct minix_dir_entry * old_de;
191 int err = -ENOENT; 193 int err = -ENOENT;
192 194
195 if (new_inode && S_ISDIR(new_inode->i_mode))
196 dentry_unhash(new_dentry);
197
193 old_de = minix_find_entry(old_dentry, &old_page); 198 old_de = minix_find_entry(old_dentry, &old_page);
194 if (!old_de) 199 if (!old_de)
195 goto out; 200 goto out;
diff --git a/fs/mpage.c b/fs/mpage.c
index 0afc809e46e0..fdfae9fa98cd 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -27,6 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
29#include <linux/pagevec.h> 29#include <linux/pagevec.h>
30#include <linux/cleancache.h>
30 31
31/* 32/*
32 * I/O completion handler for multipage BIOs. 33 * I/O completion handler for multipage BIOs.
@@ -271,6 +272,12 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
271 SetPageMappedToDisk(page); 272 SetPageMappedToDisk(page);
272 } 273 }
273 274
275 if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
276 cleancache_get_page(page) == 0) {
277 SetPageUptodate(page);
278 goto confused;
279 }
280
274 /* 281 /*
275 * This page will go to BIO. Do we need to send this BIO off first? 282 * This page will go to BIO. Do we need to send this BIO off first?
276 */ 283 */
diff --git a/fs/namei.c b/fs/namei.c
index e3c4f112ebf7..2358b326b221 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -391,79 +391,28 @@ void path_put(struct path *path)
391} 391}
392EXPORT_SYMBOL(path_put); 392EXPORT_SYMBOL(path_put);
393 393
394/** 394/*
395 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
396 * @nd: nameidata pathwalk data to drop
397 * Returns: 0 on success, -ECHILD on failure
398 *
399 * Path walking has 2 modes, rcu-walk and ref-walk (see 395 * Path walking has 2 modes, rcu-walk and ref-walk (see
400 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt 396 * Documentation/filesystems/path-lookup.txt). In situations when we can't
401 * to drop out of rcu-walk mode and take normal reference counts on dentries 397 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
402 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take 398 * normal reference counts on dentries and vfsmounts to transition to rcu-walk
403 * refcounts at the last known good point before rcu-walk got stuck, so 399 * mode. Refcounts are grabbed at the last known good point before rcu-walk
404 * ref-walk may continue from there. If this is not successful (eg. a seqcount 400 * got stuck, so ref-walk may continue from there. If this is not successful
405 * has changed), then failure is returned and path walk restarts from the 401 * (eg. a seqcount has changed), then failure is returned and it's up to caller
406 * beginning in ref-walk mode. 402 * to restart the path walk from the beginning in ref-walk mode.
407 *
408 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
409 * ref-walk. Must be called from rcu-walk context.
410 */ 403 */
411static int nameidata_drop_rcu(struct nameidata *nd)
412{
413 struct fs_struct *fs = current->fs;
414 struct dentry *dentry = nd->path.dentry;
415 int want_root = 0;
416
417 BUG_ON(!(nd->flags & LOOKUP_RCU));
418 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
419 want_root = 1;
420 spin_lock(&fs->lock);
421 if (nd->root.mnt != fs->root.mnt ||
422 nd->root.dentry != fs->root.dentry)
423 goto err_root;
424 }
425 spin_lock(&dentry->d_lock);
426 if (!__d_rcu_to_refcount(dentry, nd->seq))
427 goto err;
428 BUG_ON(nd->inode != dentry->d_inode);
429 spin_unlock(&dentry->d_lock);
430 if (want_root) {
431 path_get(&nd->root);
432 spin_unlock(&fs->lock);
433 }
434 mntget(nd->path.mnt);
435
436 rcu_read_unlock();
437 br_read_unlock(vfsmount_lock);
438 nd->flags &= ~LOOKUP_RCU;
439 return 0;
440err:
441 spin_unlock(&dentry->d_lock);
442err_root:
443 if (want_root)
444 spin_unlock(&fs->lock);
445 return -ECHILD;
446}
447
448/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
449static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
450{
451 if (nd->flags & LOOKUP_RCU)
452 return nameidata_drop_rcu(nd);
453 return 0;
454}
455 404
456/** 405/**
457 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk 406 * unlazy_walk - try to switch to ref-walk mode.
458 * @nd: nameidata pathwalk data to drop 407 * @nd: nameidata pathwalk data
459 * @dentry: dentry to drop 408 * @dentry: child of nd->path.dentry or NULL
460 * Returns: 0 on success, -ECHILD on failure 409 * Returns: 0 on success, -ECHILD on failure
461 * 410 *
462 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, 411 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
463 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on 412 * for ref-walk mode. @dentry must be a path found by a do_lookup call on
464 * @nd. Must be called from rcu-walk context. 413 * @nd or NULL. Must be called from rcu-walk context.
465 */ 414 */
466static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) 415static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
467{ 416{
468 struct fs_struct *fs = current->fs; 417 struct fs_struct *fs = current->fs;
469 struct dentry *parent = nd->path.dentry; 418 struct dentry *parent = nd->path.dentry;
@@ -478,18 +427,25 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
478 goto err_root; 427 goto err_root;
479 } 428 }
480 spin_lock(&parent->d_lock); 429 spin_lock(&parent->d_lock);
481 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 430 if (!dentry) {
482 if (!__d_rcu_to_refcount(dentry, nd->seq)) 431 if (!__d_rcu_to_refcount(parent, nd->seq))
483 goto err; 432 goto err_parent;
484 /* 433 BUG_ON(nd->inode != parent->d_inode);
485 * If the sequence check on the child dentry passed, then the child has 434 } else {
486 * not been removed from its parent. This means the parent dentry must 435 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
487 * be valid and able to take a reference at this point. 436 if (!__d_rcu_to_refcount(dentry, nd->seq))
488 */ 437 goto err_child;
489 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); 438 /*
490 BUG_ON(!parent->d_count); 439 * If the sequence check on the child dentry passed, then
491 parent->d_count++; 440 * the child has not been removed from its parent. This
492 spin_unlock(&dentry->d_lock); 441 * means the parent dentry must be valid and able to take
442 * a reference at this point.
443 */
444 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
445 BUG_ON(!parent->d_count);
446 parent->d_count++;
447 spin_unlock(&dentry->d_lock);
448 }
493 spin_unlock(&parent->d_lock); 449 spin_unlock(&parent->d_lock);
494 if (want_root) { 450 if (want_root) {
495 path_get(&nd->root); 451 path_get(&nd->root);
@@ -501,8 +457,10 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
501 br_read_unlock(vfsmount_lock); 457 br_read_unlock(vfsmount_lock);
502 nd->flags &= ~LOOKUP_RCU; 458 nd->flags &= ~LOOKUP_RCU;
503 return 0; 459 return 0;
504err: 460
461err_child:
505 spin_unlock(&dentry->d_lock); 462 spin_unlock(&dentry->d_lock);
463err_parent:
506 spin_unlock(&parent->d_lock); 464 spin_unlock(&parent->d_lock);
507err_root: 465err_root:
508 if (want_root) 466 if (want_root)
@@ -510,59 +468,6 @@ err_root:
510 return -ECHILD; 468 return -ECHILD;
511} 469}
512 470
513/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
514static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
515{
516 if (nd->flags & LOOKUP_RCU) {
517 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
518 nd->flags &= ~LOOKUP_RCU;
519 if (!(nd->flags & LOOKUP_ROOT))
520 nd->root.mnt = NULL;
521 rcu_read_unlock();
522 br_read_unlock(vfsmount_lock);
523 return -ECHILD;
524 }
525 }
526 return 0;
527}
528
529/**
530 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
531 * @nd: nameidata pathwalk data to drop
532 * Returns: 0 on success, -ECHILD on failure
533 *
534 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
535 * nd->path should be the final element of the lookup, so nd->root is discarded.
536 * Must be called from rcu-walk context.
537 */
538static int nameidata_drop_rcu_last(struct nameidata *nd)
539{
540 struct dentry *dentry = nd->path.dentry;
541
542 BUG_ON(!(nd->flags & LOOKUP_RCU));
543 nd->flags &= ~LOOKUP_RCU;
544 if (!(nd->flags & LOOKUP_ROOT))
545 nd->root.mnt = NULL;
546 spin_lock(&dentry->d_lock);
547 if (!__d_rcu_to_refcount(dentry, nd->seq))
548 goto err_unlock;
549 BUG_ON(nd->inode != dentry->d_inode);
550 spin_unlock(&dentry->d_lock);
551
552 mntget(nd->path.mnt);
553
554 rcu_read_unlock();
555 br_read_unlock(vfsmount_lock);
556
557 return 0;
558
559err_unlock:
560 spin_unlock(&dentry->d_lock);
561 rcu_read_unlock();
562 br_read_unlock(vfsmount_lock);
563 return -ECHILD;
564}
565
566/** 471/**
567 * release_open_intent - free up open intent resources 472 * release_open_intent - free up open intent resources
568 * @nd: pointer to nameidata 473 * @nd: pointer to nameidata
@@ -606,26 +511,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
606 return dentry; 511 return dentry;
607} 512}
608 513
609/* 514/**
610 * handle_reval_path - force revalidation of a dentry 515 * complete_walk - successful completion of path walk
611 * 516 * @nd: pointer nameidata
612 * In some situations the path walking code will trust dentries without
613 * revalidating them. This causes problems for filesystems that depend on
614 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
615 * (which indicates that it's possible for the dentry to go stale), force
616 * a d_revalidate call before proceeding.
617 * 517 *
618 * Returns 0 if the revalidation was successful. If the revalidation fails, 518 * If we had been in RCU mode, drop out of it and legitimize nd->path.
619 * either return the error returned by d_revalidate or -ESTALE if the 519 * Revalidate the final result, unless we'd already done that during
620 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to 520 * the path walk or the filesystem doesn't ask for it. Return 0 on
621 * invalidate the dentry. It's up to the caller to handle putting references 521 * success, -error on failure. In case of failure caller does not
622 * to the path if necessary. 522 * need to drop nd->path.
623 */ 523 */
624static inline int handle_reval_path(struct nameidata *nd) 524static int complete_walk(struct nameidata *nd)
625{ 525{
626 struct dentry *dentry = nd->path.dentry; 526 struct dentry *dentry = nd->path.dentry;
627 int status; 527 int status;
628 528
529 if (nd->flags & LOOKUP_RCU) {
530 nd->flags &= ~LOOKUP_RCU;
531 if (!(nd->flags & LOOKUP_ROOT))
532 nd->root.mnt = NULL;
533 spin_lock(&dentry->d_lock);
534 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
535 spin_unlock(&dentry->d_lock);
536 rcu_read_unlock();
537 br_read_unlock(vfsmount_lock);
538 return -ECHILD;
539 }
540 BUG_ON(nd->inode != dentry->d_inode);
541 spin_unlock(&dentry->d_lock);
542 mntget(nd->path.mnt);
543 rcu_read_unlock();
544 br_read_unlock(vfsmount_lock);
545 }
546
629 if (likely(!(nd->flags & LOOKUP_JUMPED))) 547 if (likely(!(nd->flags & LOOKUP_JUMPED)))
630 return 0; 548 return 0;
631 549
@@ -643,6 +561,7 @@ static inline int handle_reval_path(struct nameidata *nd)
643 if (!status) 561 if (!status)
644 status = -ESTALE; 562 status = -ESTALE;
645 563
564 path_put(&nd->path);
646 return status; 565 return status;
647} 566}
648 567
@@ -1241,13 +1160,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1241 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1160 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1242 return 0; 1161 return 0;
1243unlazy: 1162unlazy:
1244 if (dentry) { 1163 if (unlazy_walk(nd, dentry))
1245 if (nameidata_dentry_drop_rcu(nd, dentry)) 1164 return -ECHILD;
1246 return -ECHILD;
1247 } else {
1248 if (nameidata_drop_rcu(nd))
1249 return -ECHILD;
1250 }
1251 } else { 1165 } else {
1252 dentry = __d_lookup(parent, name); 1166 dentry = __d_lookup(parent, name);
1253 } 1167 }
@@ -1303,7 +1217,7 @@ static inline int may_lookup(struct nameidata *nd)
1303 int err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1217 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1304 if (err != -ECHILD) 1218 if (err != -ECHILD)
1305 return err; 1219 return err;
1306 if (nameidata_drop_rcu(nd)) 1220 if (unlazy_walk(nd, NULL))
1307 return -ECHILD; 1221 return -ECHILD;
1308 } 1222 }
1309 return exec_permission(nd->inode, 0); 1223 return exec_permission(nd->inode, 0);
@@ -1357,8 +1271,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1357 return -ENOENT; 1271 return -ENOENT;
1358 } 1272 }
1359 if (unlikely(inode->i_op->follow_link) && follow) { 1273 if (unlikely(inode->i_op->follow_link) && follow) {
1360 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) 1274 if (nd->flags & LOOKUP_RCU) {
1361 return -ECHILD; 1275 if (unlikely(unlazy_walk(nd, path->dentry))) {
1276 terminate_walk(nd);
1277 return -ECHILD;
1278 }
1279 }
1362 BUG_ON(inode != path->dentry->d_inode); 1280 BUG_ON(inode != path->dentry->d_inode);
1363 return 1; 1281 return 1;
1364 } 1282 }
@@ -1378,12 +1296,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1378{ 1296{
1379 int res; 1297 int res;
1380 1298
1381 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1382 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1299 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1383 path_put_conditional(path, nd); 1300 path_put_conditional(path, nd);
1384 path_put(&nd->path); 1301 path_put(&nd->path);
1385 return -ELOOP; 1302 return -ELOOP;
1386 } 1303 }
1304 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1387 1305
1388 nd->depth++; 1306 nd->depth++;
1389 current->link_count++; 1307 current->link_count++;
@@ -1657,18 +1575,8 @@ static int path_lookupat(int dfd, const char *name,
1657 } 1575 }
1658 } 1576 }
1659 1577
1660 if (nd->flags & LOOKUP_RCU) { 1578 if (!err)
1661 /* went all way through without dropping RCU */ 1579 err = complete_walk(nd);
1662 BUG_ON(err);
1663 if (nameidata_drop_rcu_last(nd))
1664 err = -ECHILD;
1665 }
1666
1667 if (!err) {
1668 err = handle_reval_path(nd);
1669 if (err)
1670 path_put(&nd->path);
1671 }
1672 1580
1673 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1581 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1674 if (!nd->inode->i_op->lookup) { 1582 if (!nd->inode->i_op->lookup) {
@@ -2134,13 +2042,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2134 return ERR_PTR(error); 2042 return ERR_PTR(error);
2135 /* fallthrough */ 2043 /* fallthrough */
2136 case LAST_ROOT: 2044 case LAST_ROOT:
2137 if (nd->flags & LOOKUP_RCU) { 2045 error = complete_walk(nd);
2138 if (nameidata_drop_rcu_last(nd))
2139 return ERR_PTR(-ECHILD);
2140 }
2141 error = handle_reval_path(nd);
2142 if (error) 2046 if (error)
2143 goto exit; 2047 return ERR_PTR(error);
2144 audit_inode(pathname, nd->path.dentry); 2048 audit_inode(pathname, nd->path.dentry);
2145 if (open_flag & O_CREAT) { 2049 if (open_flag & O_CREAT) {
2146 error = -EISDIR; 2050 error = -EISDIR;
@@ -2148,10 +2052,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2148 } 2052 }
2149 goto ok; 2053 goto ok;
2150 case LAST_BIND: 2054 case LAST_BIND:
2151 /* can't be RCU mode here */ 2055 error = complete_walk(nd);
2152 error = handle_reval_path(nd);
2153 if (error) 2056 if (error)
2154 goto exit; 2057 return ERR_PTR(error);
2155 audit_inode(pathname, dir); 2058 audit_inode(pathname, dir);
2156 goto ok; 2059 goto ok;
2157 } 2060 }
@@ -2170,10 +2073,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2170 if (error) /* symlink */ 2073 if (error) /* symlink */
2171 return NULL; 2074 return NULL;
2172 /* sayonara */ 2075 /* sayonara */
2173 if (nd->flags & LOOKUP_RCU) { 2076 error = complete_walk(nd);
2174 if (nameidata_drop_rcu_last(nd)) 2077 if (error)
2175 return ERR_PTR(-ECHILD); 2078 return ERR_PTR(-ECHILD);
2176 }
2177 2079
2178 error = -ENOTDIR; 2080 error = -ENOTDIR;
2179 if (nd->flags & LOOKUP_DIRECTORY) { 2081 if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2185,11 +2087,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2185 } 2087 }
2186 2088
2187 /* create side of things */ 2089 /* create side of things */
2188 2090 error = complete_walk(nd);
2189 if (nd->flags & LOOKUP_RCU) { 2091 if (error)
2190 if (nameidata_drop_rcu_last(nd)) 2092 return ERR_PTR(error);
2191 return ERR_PTR(-ECHILD);
2192 }
2193 2093
2194 audit_inode(pathname, dir); 2094 audit_inode(pathname, dir);
2195 error = -EISDIR; 2095 error = -EISDIR;
@@ -2629,10 +2529,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
2629} 2529}
2630 2530
2631/* 2531/*
2632 * We try to drop the dentry early: we should have 2532 * The dentry_unhash() helper will try to drop the dentry early: we
2633 * a usage count of 2 if we're the only user of this 2533 * should have a usage count of 2 if we're the only user of this
2634 * dentry, and if that is true (possibly after pruning 2534 * dentry, and if that is true (possibly after pruning the dcache),
2635 * the dcache), then we drop the dentry now. 2535 * then we drop the dentry now.
2636 * 2536 *
2637 * A low-level filesystem can, if it choses, legally 2537 * A low-level filesystem can, if it choses, legally
2638 * do a 2538 * do a
@@ -2645,10 +2545,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
2645 */ 2545 */
2646void dentry_unhash(struct dentry *dentry) 2546void dentry_unhash(struct dentry *dentry)
2647{ 2547{
2648 dget(dentry);
2649 shrink_dcache_parent(dentry); 2548 shrink_dcache_parent(dentry);
2650 spin_lock(&dentry->d_lock); 2549 spin_lock(&dentry->d_lock);
2651 if (dentry->d_count == 2) 2550 if (dentry->d_count == 1)
2652 __d_drop(dentry); 2551 __d_drop(dentry);
2653 spin_unlock(&dentry->d_lock); 2552 spin_unlock(&dentry->d_lock);
2654} 2553}
@@ -2664,25 +2563,26 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2664 return -EPERM; 2563 return -EPERM;
2665 2564
2666 mutex_lock(&dentry->d_inode->i_mutex); 2565 mutex_lock(&dentry->d_inode->i_mutex);
2667 dentry_unhash(dentry); 2566
2567 error = -EBUSY;
2668 if (d_mountpoint(dentry)) 2568 if (d_mountpoint(dentry))
2669 error = -EBUSY; 2569 goto out;
2670 else { 2570
2671 error = security_inode_rmdir(dir, dentry); 2571 error = security_inode_rmdir(dir, dentry);
2672 if (!error) { 2572 if (error)
2673 error = dir->i_op->rmdir(dir, dentry); 2573 goto out;
2674 if (!error) { 2574
2675 dentry->d_inode->i_flags |= S_DEAD; 2575 error = dir->i_op->rmdir(dir, dentry);
2676 dont_mount(dentry); 2576 if (error)
2677 } 2577 goto out;
2678 } 2578
2679 } 2579 dentry->d_inode->i_flags |= S_DEAD;
2580 dont_mount(dentry);
2581
2582out:
2680 mutex_unlock(&dentry->d_inode->i_mutex); 2583 mutex_unlock(&dentry->d_inode->i_mutex);
2681 if (!error) { 2584 if (!error)
2682 d_delete(dentry); 2585 d_delete(dentry);
2683 }
2684 dput(dentry);
2685
2686 return error; 2586 return error;
2687} 2587}
2688 2588
@@ -3053,12 +2953,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
3053 * HOWEVER, it relies on the assumption that any object with ->lookup() 2953 * HOWEVER, it relies on the assumption that any object with ->lookup()
3054 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2954 * has no more than 1 dentry. If "hybrid" objects will ever appear,
3055 * we'd better make sure that there's no link(2) for them. 2955 * we'd better make sure that there's no link(2) for them.
3056 * d) some filesystems don't support opened-but-unlinked directories, 2956 * d) conversion from fhandle to dentry may come in the wrong moment - when
3057 * either because of layout or because they are not ready to deal with
3058 * all cases correctly. The latter will be fixed (taking this sort of
3059 * stuff into VFS), but the former is not going away. Solution: the same
3060 * trick as in rmdir().
3061 * e) conversion from fhandle to dentry may come in the wrong moment - when
3062 * we are removing the target. Solution: we will have to grab ->i_mutex 2957 * we are removing the target. Solution: we will have to grab ->i_mutex
3063 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2958 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
3064 * ->i_mutex on parents, which works but leads to some truly excessive 2959 * ->i_mutex on parents, which works but leads to some truly excessive
@@ -3068,7 +2963,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3068 struct inode *new_dir, struct dentry *new_dentry) 2963 struct inode *new_dir, struct dentry *new_dentry)
3069{ 2964{
3070 int error = 0; 2965 int error = 0;
3071 struct inode *target; 2966 struct inode *target = new_dentry->d_inode;
3072 2967
3073 /* 2968 /*
3074 * If we are going to change the parent - check write permissions, 2969 * If we are going to change the parent - check write permissions,
@@ -3084,26 +2979,24 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3084 if (error) 2979 if (error)
3085 return error; 2980 return error;
3086 2981
3087 target = new_dentry->d_inode;
3088 if (target) 2982 if (target)
3089 mutex_lock(&target->i_mutex); 2983 mutex_lock(&target->i_mutex);
3090 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2984
3091 error = -EBUSY; 2985 error = -EBUSY;
3092 else { 2986 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3093 if (target) 2987 goto out;
3094 dentry_unhash(new_dentry); 2988
3095 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2989 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3096 } 2990 if (error)
2991 goto out;
2992
3097 if (target) { 2993 if (target) {
3098 if (!error) { 2994 target->i_flags |= S_DEAD;
3099 target->i_flags |= S_DEAD; 2995 dont_mount(new_dentry);
3100 dont_mount(new_dentry);
3101 }
3102 mutex_unlock(&target->i_mutex);
3103 if (d_unhashed(new_dentry))
3104 d_rehash(new_dentry);
3105 dput(new_dentry);
3106 } 2996 }
2997out:
2998 if (target)
2999 mutex_unlock(&target->i_mutex);
3107 if (!error) 3000 if (!error)
3108 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3001 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3109 d_move(old_dentry,new_dentry); 3002 d_move(old_dentry,new_dentry);
@@ -3113,7 +3006,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3113static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 3006static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
3114 struct inode *new_dir, struct dentry *new_dentry) 3007 struct inode *new_dir, struct dentry *new_dentry)
3115{ 3008{
3116 struct inode *target; 3009 struct inode *target = new_dentry->d_inode;
3117 int error; 3010 int error;
3118 3011
3119 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 3012 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3121,19 +3014,22 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
3121 return error; 3014 return error;
3122 3015
3123 dget(new_dentry); 3016 dget(new_dentry);
3124 target = new_dentry->d_inode;
3125 if (target) 3017 if (target)
3126 mutex_lock(&target->i_mutex); 3018 mutex_lock(&target->i_mutex);
3019
3020 error = -EBUSY;
3127 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 3021 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
3128 error = -EBUSY; 3022 goto out;
3129 else 3023
3130 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3024 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3131 if (!error) { 3025 if (error)
3132 if (target) 3026 goto out;
3133 dont_mount(new_dentry); 3027
3134 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3028 if (target)
3135 d_move(old_dentry, new_dentry); 3029 dont_mount(new_dentry);
3136 } 3030 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3031 d_move(old_dentry, new_dentry);
3032out:
3137 if (target) 3033 if (target)
3138 mutex_unlock(&target->i_mutex); 3034 mutex_unlock(&target->i_mutex);
3139 dput(new_dentry); 3035 dput(new_dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index d99bcf59e4c2..fe59bd145d21 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1695,7 +1695,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1695 1695
1696static int flags_to_propagation_type(int flags) 1696static int flags_to_propagation_type(int flags)
1697{ 1697{
1698 int type = flags & ~MS_REC; 1698 int type = flags & ~(MS_REC | MS_SILENT);
1699 1699
1700 /* Fail if any non-propagation flags are set */ 1700 /* Fail if any non-propagation flags are set */
1701 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1701 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f6946bb5cb55..e3e646b06404 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -1033,6 +1033,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
1033 DPRINTK("ncp_rmdir: removing %s/%s\n", 1033 DPRINTK("ncp_rmdir: removing %s/%s\n",
1034 dentry->d_parent->d_name.name, dentry->d_name.name); 1034 dentry->d_parent->d_name.name, dentry->d_name.name);
1035 1035
1036 dentry_unhash(dentry);
1037
1036 error = -EBUSY; 1038 error = -EBUSY;
1037 if (!d_unhashed(dentry)) 1039 if (!d_unhashed(dentry))
1038 goto out; 1040 goto out;
@@ -1139,6 +1141,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1139 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1141 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1140 new_dentry->d_parent->d_name.name, new_dentry->d_name.name); 1142 new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
1141 1143
1144 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
1145 dentry_unhash(new_dentry);
1146
1142 ncp_age_dentry(server, old_dentry); 1147 ncp_age_dentry(server, old_dentry);
1143 ncp_age_dentry(server, new_dentry); 1148 ncp_age_dentry(server, new_dentry);
1144 1149
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4ce4893..202f370526a7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
461#endif 461#endif
462 struct ncp_entry_info finfo; 462 struct ncp_entry_info finfo;
463 463
464 data.wdog_pid = NULL; 464 memset(&data, 0, sizeof(data));
465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); 465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
466 if (!server) 466 if (!server)
467 return -ENOMEM; 467 return -ENOMEM;
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; 496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
497 497
498 data.flags = md->flags; 498 data.flags = md->flags;
499 data.int_flags = 0;
500 data.mounted_uid = md->mounted_uid; 499 data.mounted_uid = md->mounted_uid;
501 data.wdog_pid = find_get_pid(md->wdog_pid); 500 data.wdog_pid = find_get_pid(md->wdog_pid);
502 data.ncp_fd = md->ncp_fd; 501 data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
507 data.file_mode = md->file_mode; 506 data.file_mode = md->file_mode;
508 data.dir_mode = md->dir_mode; 507 data.dir_mode = md->dir_mode;
509 data.info_fd = -1; 508 data.info_fd = -1;
510 data.mounted_vol[0] = 0;
511 } 509 }
512 break; 510 break;
513 default: 511 default:
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index a7c07b44b100..e5d71b27a5b0 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -16,6 +16,7 @@
16#include <linux/mman.h> 16#include <linux/mman.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/fcntl.h> 18#include <linux/fcntl.h>
19#include <linux/memcontrol.h>
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/system.h> 22#include <asm/system.h>
@@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
92 * -- wli 93 * -- wli
93 */ 94 */
94 count_vm_event(PGMAJFAULT); 95 count_vm_event(PGMAJFAULT);
96 mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
95 return VM_FAULT_MAJOR; 97 return VM_FAULT_MAJOR;
96} 98}
97 99
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
2042 } 2042 }
2043} 2043}
2044 2044
2045int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 2045int nfs_access_cache_shrinker(struct shrinker *shrink,
2046 struct shrink_control *sc)
2046{ 2047{
2047 LIST_HEAD(head); 2048 LIST_HEAD(head);
2048 struct nfs_inode *nfsi, *next; 2049 struct nfs_inode *nfsi, *next;
2049 struct nfs_access_entry *cache; 2050 struct nfs_access_entry *cache;
2051 int nr_to_scan = sc->nr_to_scan;
2052 gfp_t gfp_mask = sc->gfp_mask;
2050 2053
2051 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 2054 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2052 return (nr_to_scan == 0) ? 0 : -1; 2055 return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
234 234
235/* dir.c */ 235/* dir.c */
236extern int nfs_access_cache_shrinker(struct shrinker *shrink, 236extern int nfs_access_cache_shrinker(struct shrinker *shrink,
237 int nr_to_scan, gfp_t gfp_mask); 237 struct shrink_control *sc);
238 238
239/* inode.c */ 239/* inode.c */
240extern struct workqueue_struct *nfsiod_workqueue; 240extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
8 * Statistsics for the reply cache 8 * Statistsics for the reply cache
9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> 9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
10 * statistics for filehandle lookup 10 * statistics for filehandle lookup
11 * io <bytes-read> <bytes-writtten> 11 * io <bytes-read> <bytes-written>
12 * statistics for IO throughput 12 * statistics for IO throughput
13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
14 * time (seconds) when nfsd thread usage above thresholds 14 * time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index f7684483785e..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
489void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
491{ 491{
492 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 492 mark_buffer_dirty(req->pr_bitmap_bh);
493 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode); 494 nilfs_mdt_mark_dirty(inode);
495 495
496 brelse(req->pr_bitmap_bh); 496 brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
529 529
530 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 530 mark_buffer_dirty(req->pr_desc_bh);
531 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode); 532 nilfs_mdt_mark_dirty(inode);
533 533
534 brelse(req->pr_bitmap_bh); 534 brelse(req->pr_bitmap_bh);
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
683 kunmap(bitmap_bh->b_page); 683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page); 684 kunmap(desc_bh->b_page);
685 685
686 nilfs_mdt_mark_buffer_dirty(desc_bh); 686 mark_buffer_dirty(desc_bh);
687 nilfs_mdt_mark_buffer_dirty(bitmap_bh); 687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode); 688 nilfs_mdt_mark_dirty(inode);
689 689
690 brelse(bitmap_bh); 690 brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
34 34
35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36{ 36{
37 return NILFS_I_NILFS(bmap->b_inode)->ns_dat; 37 struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
38
39 return nilfs->ns_dat;
38} 40}
39 41
40static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, 42static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37void nilfs_btnode_cache_init(struct address_space *btnc,
38 struct backing_dev_info *bdi)
39{
40 nilfs_mapping_init(btnc, bdi);
41}
42
43void nilfs_btnode_cache_clear(struct address_space *btnc) 37void nilfs_btnode_cache_clear(struct address_space *btnc)
44{ 38{
45 invalidate_mapping_pages(btnc, 0, -1); 39 invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
62 BUG(); 56 BUG();
63 } 57 }
64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, 1 << inode->i_blkbits);
65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
66 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
67 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
68 set_buffer_uptodate(bh); 62 set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
94 if (pblocknr == 0) { 88 if (pblocknr == 0) {
95 pblocknr = blocknr; 89 pblocknr = blocknr;
96 if (inode->i_ino != NILFS_DAT_INO) { 90 if (inode->i_ino != NILFS_DAT_INO) {
97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 91 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
98 92
99 /* blocknr is a virtual block number */ 93 /* blocknr is a virtual block number */
100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 94 err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
95 &pblocknr);
101 if (unlikely(err)) { 96 if (unlikely(err)) {
102 brelse(bh); 97 brelse(bh);
103 goto out_locked; 98 goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
120 goto found; 115 goto found;
121 } 116 }
122 set_buffer_mapped(bh); 117 set_buffer_mapped(bh);
123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 118 bh->b_bdev = inode->i_sb->s_bdev;
124 bh->b_blocknr = pblocknr; /* set block address for read */ 119 bh->b_blocknr = pblocknr; /* set block address for read */
125 bh->b_end_io = end_buffer_read_sync; 120 bh->b_end_io = end_buffer_read_sync;
126 get_bh(bh); 121 get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 "invalid oldkey %lld (newkey=%lld)", 254 "invalid oldkey %lld (newkey=%lld)",
260 (unsigned long long)oldkey, 255 (unsigned long long)oldkey,
261 (unsigned long long)newkey); 256 (unsigned long long)newkey);
262 nilfs_btnode_mark_dirty(obh); 257 mark_buffer_dirty(obh);
263 258
264 spin_lock_irq(&btnc->tree_lock); 259 spin_lock_irq(&btnc->tree_lock);
265 radix_tree_delete(&btnc->page_tree, oldkey); 260 radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
271 unlock_page(opage); 266 unlock_page(opage);
272 } else { 267 } else {
273 nilfs_copy_buffer(nbh, obh); 268 nilfs_copy_buffer(nbh, obh);
274 nilfs_btnode_mark_dirty(nbh); 269 mark_buffer_dirty(nbh);
275 270
276 nbh->b_blocknr = newkey; 271 nbh->b_blocknr = newkey;
277 ctxt->bh = nbh; 272 ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
41void nilfs_btnode_cache_clear(struct address_space *); 40void nilfs_btnode_cache_clear(struct address_space *);
42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 41struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
43 __u64 blocknr); 42 __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
51void nilfs_btnode_abort_change_key(struct address_space *, 50void nilfs_btnode_abort_change_key(struct address_space *,
52 struct nilfs_btnode_chkey_ctxt *); 51 struct nilfs_btnode_chkey_ctxt *);
53 52
54#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
55
56
57#endif /* _NILFS_BTNODE_H */ 53#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
714 nilfs_btree_get_nonroot_node(path, level), 714 nilfs_btree_get_nonroot_node(path, level),
715 path[level].bp_index, key); 715 path[level].bp_index, key);
716 if (!buffer_dirty(path[level].bp_bh)) 716 if (!buffer_dirty(path[level].bp_bh))
717 nilfs_btnode_mark_dirty(path[level].bp_bh); 717 mark_buffer_dirty(path[level].bp_bh);
718 } while ((path[level].bp_index == 0) && 718 } while ((path[level].bp_index == 0) &&
719 (++level < nilfs_btree_height(btree) - 1)); 719 (++level < nilfs_btree_height(btree) - 1));
720 } 720 }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
739 nilfs_btree_node_insert(node, path[level].bp_index, 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk); 740 *keyp, *ptrp, ncblk);
741 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
742 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 mark_buffer_dirty(path[level].bp_bh);
743 743
744 if (path[level].bp_index == 0) 744 if (path[level].bp_index == 0)
745 nilfs_btree_promote_key(btree, path, level + 1, 745 nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 778
779 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
780 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 mark_buffer_dirty(path[level].bp_bh);
781 if (!buffer_dirty(path[level].bp_sib_bh)) 781 if (!buffer_dirty(path[level].bp_sib_bh))
782 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 782 mark_buffer_dirty(path[level].bp_sib_bh);
783 783
784 nilfs_btree_promote_key(btree, path, level + 1, 784 nilfs_btree_promote_key(btree, path, level + 1,
785 nilfs_btree_node_get_key(node, 0)); 785 nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 824
825 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
826 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 mark_buffer_dirty(path[level].bp_bh);
827 if (!buffer_dirty(path[level].bp_sib_bh)) 827 if (!buffer_dirty(path[level].bp_sib_bh))
828 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 828 mark_buffer_dirty(path[level].bp_sib_bh);
829 829
830 path[level + 1].bp_index++; 830 path[level + 1].bp_index++;
831 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 871
872 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
873 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 mark_buffer_dirty(path[level].bp_bh);
874 if (!buffer_dirty(path[level].bp_sib_bh)) 874 if (!buffer_dirty(path[level].bp_sib_bh))
875 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 875 mark_buffer_dirty(path[level].bp_sib_bh);
876 876
877 newkey = nilfs_btree_node_get_key(right, 0); 877 newkey = nilfs_btree_node_get_key(right, 0);
878 newptr = path[level].bp_newreq.bpr_ptr; 878 newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
919 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
920 920
921 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
922 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 922 mark_buffer_dirty(path[level].bp_sib_bh);
923 923
924 path[level].bp_bh = path[level].bp_sib_bh; 924 path[level].bp_bh = path[level].bp_sib_bh;
925 path[level].bp_sib_bh = NULL; 925 path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1194 nilfs_btree_node_delete(node, path[level].bp_index, 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk); 1195 keyp, ptrp, ncblk);
1196 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1197 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 mark_buffer_dirty(path[level].bp_bh);
1198 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
1199 nilfs_btree_promote_key(btree, path, level + 1, 1199 nilfs_btree_promote_key(btree, path, level + 1,
1200 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 1227
1228 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1229 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 mark_buffer_dirty(path[level].bp_bh);
1230 if (!buffer_dirty(path[level].bp_sib_bh)) 1230 if (!buffer_dirty(path[level].bp_sib_bh))
1231 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1231 mark_buffer_dirty(path[level].bp_sib_bh);
1232 1232
1233 nilfs_btree_promote_key(btree, path, level + 1, 1233 nilfs_btree_promote_key(btree, path, level + 1,
1234 nilfs_btree_node_get_key(node, 0)); 1234 nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 1259
1260 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1261 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 mark_buffer_dirty(path[level].bp_bh);
1262 if (!buffer_dirty(path[level].bp_sib_bh)) 1262 if (!buffer_dirty(path[level].bp_sib_bh))
1263 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1263 mark_buffer_dirty(path[level].bp_sib_bh);
1264 1264
1265 path[level + 1].bp_index++; 1265 path[level + 1].bp_index++;
1266 nilfs_btree_promote_key(btree, path, level + 1, 1266 nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 1290
1291 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 mark_buffer_dirty(path[level].bp_sib_bh);
1293 1293
1294 nilfs_btnode_delete(path[level].bp_bh); 1294 nilfs_btnode_delete(path[level].bp_bh);
1295 path[level].bp_bh = path[level].bp_sib_bh; 1295 path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 1316
1317 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1318 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 mark_buffer_dirty(path[level].bp_bh);
1319 1319
1320 nilfs_btnode_delete(path[level].bp_sib_bh); 1320 nilfs_btnode_delete(path[level].bp_sib_bh);
1321 path[level].bp_sib_bh = NULL; 1321 path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1712 nilfs_btnode_mark_dirty(bh); 1712 mark_buffer_dirty(bh);
1713 if (!nilfs_bmap_dirty(btree)) 1713 if (!nilfs_bmap_dirty(btree))
1714 nilfs_bmap_set_dirty(btree); 1714 nilfs_bmap_set_dirty(btree);
1715 1715
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1787{ 1787{
1788 while ((++level < nilfs_btree_height(btree) - 1) && 1788 while ((++level < nilfs_btree_height(btree) - 1) &&
1789 !buffer_dirty(path[level].bp_bh)) 1789 !buffer_dirty(path[level].bp_bh))
1790 nilfs_btnode_mark_dirty(path[level].bp_bh); 1790 mark_buffer_dirty(path[level].bp_bh);
1791 1791
1792 return 0; 1792 return 0;
1793} 1793}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2229 } 2229 }
2230 2230
2231 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2232 nilfs_btnode_mark_dirty(bh); 2232 mark_buffer_dirty(bh);
2233 brelse(bh); 2233 brelse(bh);
2234 if (!nilfs_bmap_dirty(btree)) 2234 if (!nilfs_bmap_dirty(btree))
2235 nilfs_bmap_set_dirty(btree); 2235 nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
216 if (!nilfs_cpfile_is_in_first(cpfile, cno)) 216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, 217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1); 218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
229 229
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
326 } 326 }
327 if (nicps > 0) { 327 if (nicps > 0) {
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) { 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 count = 332 count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 nilfs_mdt_mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr, KM_USER0);
364 } 364 }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr, KM_USER0);
673 673
674 nilfs_mdt_mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 nilfs_mdt_mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
676 nilfs_mdt_mark_buffer_dirty(cp_bh); 676 mark_buffer_dirty(cp_bh);
677 nilfs_mdt_mark_buffer_dirty(header_bh); 677 mark_buffer_dirty(header_bh);
678 nilfs_mdt_mark_dirty(cpfile); 678 nilfs_mdt_mark_dirty(cpfile);
679 679
680 brelse(prev_bh); 680 brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr, KM_USER0);
776 776
777 nilfs_mdt_mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 nilfs_mdt_mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
779 nilfs_mdt_mark_buffer_dirty(cp_bh); 779 mark_buffer_dirty(cp_bh);
780 nilfs_mdt_mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(cpfile); 781 nilfs_mdt_mark_dirty(cpfile);
782 782
783 brelse(prev_bh); 783 brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
54static void nilfs_dat_commit_entry(struct inode *dat, 54static void nilfs_dat_commit_entry(struct inode *dat,
55 struct nilfs_palloc_req *req) 55 struct nilfs_palloc_req *req)
56{ 56{
57 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); 57 mark_buffer_dirty(req->pr_entry_bh);
58 nilfs_mdt_mark_dirty(dat); 58 nilfs_mdt_mark_dirty(dat);
59 brelse(req->pr_entry_bh); 59 brelse(req->pr_entry_bh);
60} 60}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 363
364 nilfs_mdt_mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
366 366
367 brelse(entry_bh); 367 brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
111 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
112 112
113 mapped: 113 mapped:
114 SetPageChecked(page);
115 wait_on_page_writeback(page); 114 wait_on_page_writeback(page);
116 return VM_FAULT_LOCKED; 115 return VM_FAULT_LOCKED;
117} 116}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
48#include "dat.h" 48#include "dat.h"
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = {
52};
53
54/* 51/*
55 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 52 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
56 * @inode - gc inode 53 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
87 goto out; 84 goto out;
88 85
89 if (pbn == 0) { 86 if (pbn == 0) {
90 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; 87 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
91 /* use original dat, not gc dat. */ 88
92 err = nilfs_dat_translate(dat_inode, vbn, &pbn); 89 err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
93 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ 90 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
94 brelse(bh); 91 brelse(bh);
95 goto failed; 92 goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
103 } 100 }
104 101
105 if (!buffer_mapped(bh)) { 102 if (!buffer_mapped(bh)) {
106 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 103 bh->b_bdev = inode->i_sb->s_bdev;
107 set_buffer_mapped(bh); 104 set_buffer_mapped(bh);
108 } 105 }
109 bh->b_blocknr = pbn; 106 bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
160 if (buffer_dirty(bh)) 157 if (buffer_dirty(bh))
161 return -EEXIST; 158 return -EEXIST;
162 159
163 if (buffer_nilfs_node(bh)) { 160 if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
164 if (nilfs_btree_broken_node_block(bh)) { 161 clear_buffer_uptodate(bh);
165 clear_buffer_uptodate(bh); 162 return -EIO;
166 return -EIO;
167 }
168 nilfs_btnode_mark_dirty(bh);
169 } else {
170 nilfs_mark_buffer_dirty(bh);
171 } 163 }
164 mark_buffer_dirty(bh);
172 return 0; 165 return 0;
173} 166}
174 167
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
178 171
179 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
180 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
181 inode->i_mapping->a_ops = &def_gcinode_aops; 174 inode->i_mapping->a_ops = &empty_aops;
182 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; 175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
183 176
184 ii->i_flags = 0; 177 ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
80 return ret; 80 return ret;
81 } 81 }
82 nilfs_palloc_commit_alloc_entry(ifile, &req); 82 nilfs_palloc_commit_alloc_entry(ifile, &req);
83 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 83 mark_buffer_dirty(req.pr_entry_bh);
84 nilfs_mdt_mark_dirty(ifile); 84 nilfs_mdt_mark_dirty(ifile);
85 *out_ino = (ino_t)req.pr_entry_nr; 85 *out_ino = (ino_t)req.pr_entry_nr;
86 *out_bh = req.pr_entry_bh; 86 *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr, KM_USER0);
130 130
131 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
133 133
134 nilfs_palloc_commit_free_entry(ifile, &req); 134 nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
74 struct buffer_head *bh_result, int create) 74 struct buffer_head *bh_result, int create)
75{ 75{
76 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct nilfs_inode_info *ii = NILFS_I(inode);
77 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0; 78 __u64 blknum = 0;
78 int err = 0, ret; 79 int err = 0, ret;
79 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
81 81
82 down_read(&NILFS_MDT(dat)->mi_sem); 82 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
84 up_read(&NILFS_MDT(dat)->mi_sem); 84 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85 if (ret >= 0) { /* found */ 85 if (ret >= 0) { /* found */
86 map_bh(bh_result, inode->i_sb, blknum); 86 map_bh(bh_result, inode->i_sb, blknum);
87 if (ret > 0) 87 if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
596 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 596 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
597 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 597 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
598 598
599 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
600 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
601
602 /* zero-fill unused portion in the case of super root block */
603 raw_inode->i_xattr = 0;
604 raw_inode->i_pad = 0;
605 memset((void *)raw_inode + sizeof(*raw_inode), 0,
606 nilfs->ns_inode_size - sizeof(*raw_inode));
607 }
608
599 if (has_bmap) 609 if (has_bmap)
600 nilfs_bmap_write(ii->i_bmap, raw_inode); 610 nilfs_bmap_write(ii->i_bmap, raw_inode);
601 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 611 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
872 return -EINVAL; /* NILFS_I_DIRTY may remain for 882 return -EINVAL; /* NILFS_I_DIRTY may remain for
873 freeing inode */ 883 freeing inode */
874 } 884 }
875 list_del(&ii->i_dirty); 885 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
877 set_bit(NILFS_I_QUEUED, &ii->i_state); 886 set_bit(NILFS_I_QUEUED, &ii->i_state);
878 } 887 }
879 spin_unlock(&nilfs->ns_inode_lock); 888 spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
892 return err; 901 return err;
893 } 902 }
894 nilfs_update_inode(inode, ibh); 903 nilfs_update_inode(inode, ibh);
895 nilfs_mdt_mark_buffer_dirty(ibh); 904 mark_buffer_dirty(ibh);
896 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 905 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
897 brelse(ibh); 906 brelse(ibh);
898 return 0; 907 return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
931int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 940int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
932 __u64 start, __u64 len) 941 __u64 start, __u64 len)
933{ 942{
934 struct the_nilfs *nilfs = NILFS_I_NILFS(inode); 943 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
935 __u64 logical = 0, phys = 0, size = 0; 944 __u64 logical = 0, phys = 0, size = 0;
936 __u32 flags = 0; 945 __u32 flags = 0;
937 loff_t isize; 946 loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
698 return 0; 698 return 0;
699} 699}
700 700
701static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
702 void __user *argp)
703{
704 __u64 newsize;
705 int ret = -EPERM;
706
707 if (!capable(CAP_SYS_ADMIN))
708 goto out;
709
710 ret = mnt_want_write(filp->f_path.mnt);
711 if (ret)
712 goto out;
713
714 ret = -EFAULT;
715 if (copy_from_user(&newsize, argp, sizeof(newsize)))
716 goto out_drop_write;
717
718 ret = nilfs_resize_fs(inode->i_sb, newsize);
719
720out_drop_write:
721 mnt_drop_write(filp->f_path.mnt);
722out:
723 return ret;
724}
725
726static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
727{
728 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
729 __u64 range[2];
730 __u64 minseg, maxseg;
731 unsigned long segbytes;
732 int ret = -EPERM;
733
734 if (!capable(CAP_SYS_ADMIN))
735 goto out;
736
737 ret = -EFAULT;
738 if (copy_from_user(range, argp, sizeof(__u64[2])))
739 goto out;
740
741 ret = -ERANGE;
742 if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
743 goto out;
744
745 segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
746
747 minseg = range[0] + segbytes - 1;
748 do_div(minseg, segbytes);
749 maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
750 do_div(maxseg, segbytes);
751 maxseg--;
752
753 ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
754out:
755 return ret;
756}
757
701static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, 758static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
702 unsigned int cmd, void __user *argp, 759 unsigned int cmd, void __user *argp,
703 size_t membsz, 760 size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
763 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); 820 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
764 case NILFS_IOCTL_SYNC: 821 case NILFS_IOCTL_SYNC:
765 return nilfs_ioctl_sync(inode, filp, cmd, argp); 822 return nilfs_ioctl_sync(inode, filp, cmd, argp);
823 case NILFS_IOCTL_RESIZE:
824 return nilfs_ioctl_resize(inode, filp, argp);
825 case NILFS_IOCTL_SET_ALLOC_RANGE:
826 return nilfs_ioctl_set_alloc_range(inode, argp);
766 default: 827 default:
767 return -ENOTTY; 828 return -ENOTTY;
768 } 829 }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr, KM_USER0);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 nilfs_mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 70 nilfs_mdt_mark_dirty(inode);
71 return 0; 71 return 0;
72} 72}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
355 err = nilfs_mdt_read_block(inode, block, 0, &bh); 355 err = nilfs_mdt_read_block(inode, block, 0, &bh);
356 if (unlikely(err)) 356 if (unlikely(err))
357 return err; 357 return err;
358 nilfs_mark_buffer_dirty(bh); 358 mark_buffer_dirty(bh);
359 nilfs_mdt_mark_dirty(inode); 359 nilfs_mdt_mark_dirty(inode);
360 brelse(bh); 360 brelse(bh);
361 return 0; 361 return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
454 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
458} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
64 return inode->i_private; 64 return inode->i_private;
65} 65}
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{
69 return inode->i_sb->s_fs_info;
70}
71
72/* Default GFP flags using highmem */ 67/* Default GFP flags using highmem */
73#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 68#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
74 69
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
93struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, 88struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
94 struct buffer_head *bh); 89 struct buffer_head *bh);
95 90
96#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
97
98static inline void nilfs_mdt_mark_dirty(struct inode *inode) 91static inline void nilfs_mdt_mark_dirty(struct inode *inode)
99{ 92{
100 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) 93 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
108 101
109static inline __u64 nilfs_mdt_cno(struct inode *inode) 102static inline __u64 nilfs_mdt_cno(struct inode *inode)
110{ 103{
111 return NILFS_I_NILFS(inode)->ns_cno; 104 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 105}
113 106
114#define nilfs_mdt_bgl_lock(inode, bg) \ 107#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 546849b3e88f..1102a5fbb744 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -334,6 +334,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
334 struct nilfs_transaction_info ti; 334 struct nilfs_transaction_info ti;
335 int err; 335 int err;
336 336
337 dentry_unhash(dentry);
338
337 err = nilfs_transaction_begin(dir->i_sb, &ti, 0); 339 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
338 if (err) 340 if (err)
339 return err; 341 return err;
@@ -369,6 +371,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
369 struct nilfs_transaction_info ti; 371 struct nilfs_transaction_info ti;
370 int err; 372 int err;
371 373
374 if (new_inode && S_ISDIR(new_inode->i_mode))
375 dentry_unhash(new_dentry);
376
372 err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); 377 err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
373 if (unlikely(err)) 378 if (unlikely(err))
374 return err; 379 return err;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
80 return &ii->vfs_inode; 80 return &ii->vfs_inode;
81} 81}
82 82
83static inline struct inode *NILFS_AS_I(struct address_space *mapping)
84{
85 return (mapping->host) ? :
86 container_of(mapping, struct inode, i_data);
87}
88
89/* 83/*
90 * Dynamic state flags of NILFS on-memory inode (i_state) 84 * Dynamic state flags of NILFS on-memory inode (i_state)
91 */ 85 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
298 int flip); 292 int flip);
299int nilfs_commit_super(struct super_block *sb, int flag); 293int nilfs_commit_super(struct super_block *sb, int flag);
300int nilfs_cleanup_super(struct super_block *sb); 294int nilfs_cleanup_super(struct super_block *sb);
295int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, 296int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
302 struct nilfs_root **root); 297 struct nilfs_root **root);
303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 298int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 (1UL << BH_NILFS_Checked))
42 41
43static struct buffer_head * 42static struct buffer_head *
44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
59 return bh; 58 return bh;
60} 59}
61 60
62/*
63 * Since the page cache of B-tree node pages or data page cache of pseudo
64 * inodes does not have a valid mapping->host pointer, calling
65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68 */
69void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70{
71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 __set_page_dirty_nobuffers(bh->b_page);
73}
74
75struct buffer_head *nilfs_grab_buffer(struct inode *inode, 61struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 struct address_space *mapping, 62 struct address_space *mapping,
77 unsigned long blkoff, 63 unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
183void nilfs_page_bug(struct page *page) 169void nilfs_page_bug(struct page *page)
184{ 170{
185 struct address_space *m; 171 struct address_space *m;
186 unsigned long ino = 0; 172 unsigned long ino;
187 173
188 if (unlikely(!page)) { 174 if (unlikely(!page)) {
189 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
191 } 177 }
192 178
193 m = page->mapping; 179 m = page->mapping;
194 if (m) { 180 ino = m ? m->host->i_ino : 0;
195 struct inode *inode = NILFS_AS_I(m); 181
196 if (inode != NULL)
197 ino = inode->i_ino;
198 }
199 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 "mapping=%p ino=%lu\n", 183 "mapping=%p ino=%lu\n",
201 page, atomic_read(&page->_count), 184 page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
217} 200}
218 201
219/** 202/**
220 * nilfs_alloc_private_page - allocate a private page with buffer heads
221 *
222 * Return Value: On success, a pointer to the allocated page is returned.
223 * On error, NULL is returned.
224 */
225struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 unsigned long state)
227{
228 struct buffer_head *bh, *head, *tail;
229 struct page *page;
230
231 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 if (unlikely(!page))
233 return NULL;
234
235 lock_page(page);
236 head = alloc_page_buffers(page, size, 0);
237 if (unlikely(!head)) {
238 unlock_page(page);
239 __free_page(page);
240 return NULL;
241 }
242
243 bh = head;
244 do {
245 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 tail = bh;
247 bh->b_bdev = bdev;
248 bh = bh->b_this_page;
249 } while (bh);
250
251 tail->b_this_page = head;
252 attach_page_buffers(page, head);
253
254 return page;
255}
256
257void nilfs_free_private_page(struct page *page)
258{
259 BUG_ON(!PageLocked(page));
260 BUG_ON(page->mapping);
261
262 if (page_has_buffers(page) && !try_to_free_buffers(page))
263 NILFS_PAGE_BUG(page, "failed to free page");
264
265 unlock_page(page);
266 __free_page(page);
267}
268
269/**
270 * nilfs_copy_page -- copy the page with buffers 203 * nilfs_copy_page -- copy the page with buffers
271 * @dst: destination page 204 * @dst: destination page
272 * @src: source page 205 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 425 return nc;
493} 426}
494 427
495void nilfs_mapping_init(struct address_space *mapping, 428void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
496 struct backing_dev_info *bdi) 429 struct backing_dev_info *bdi)
497{ 430{
498 mapping->host = NULL; 431 mapping->host = inode;
499 mapping->flags = 0; 432 mapping->flags = 0;
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 433 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 434 mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
38 BH_NILFS_Redirected, 38 BH_NILFS_Redirected,
39}; 39};
40 40
41BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
42BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
43BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
44BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ 43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
45BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ 44BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
46 45
47 46
48void nilfs_mark_buffer_dirty(struct buffer_head *bh);
49int __nilfs_clear_page_dirty(struct page *); 47int __nilfs_clear_page_dirty(struct page *);
50 48
51struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, 49struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
54void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); 52void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
55int nilfs_page_buffers_clean(struct page *); 53int nilfs_page_buffers_clean(struct page *);
56void nilfs_page_bug(struct page *); 54void nilfs_page_bug(struct page *);
57struct page *nilfs_alloc_private_page(struct block_device *, int,
58 unsigned long);
59void nilfs_free_private_page(struct page *);
60 55
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 56int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 58void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 59void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
65 struct backing_dev_info *bdi); 60 struct backing_dev_info *bdi);
66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
67unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
387static void dispose_recovery_list(struct list_head *head) 387static void dispose_recovery_list(struct list_head *head)
388{ 388{
389 while (!list_empty(head)) { 389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb 390 struct nilfs_recovery_block *rb;
391 = list_entry(head->next, 391
392 struct nilfs_recovery_block, list); 392 rb = list_first_entry(head, struct nilfs_recovery_block, list);
393 list_del(&rb->list); 393 list_del(&rb->list);
394 kfree(rb); 394 kfree(rb);
395 } 395 }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
416void nilfs_dispose_segment_list(struct list_head *head) 416void nilfs_dispose_segment_list(struct list_head *head)
417{ 417{
418 while (!list_empty(head)) { 418 while (!list_empty(head)) {
419 struct nilfs_segment_entry *ent 419 struct nilfs_segment_entry *ent;
420 = list_entry(head->next, 420
421 struct nilfs_segment_entry, list); 421 ent = list_first_entry(head, struct nilfs_segment_entry, list);
422 list_del(&ent->list); 422 list_del(&ent->list);
423 kfree(ent); 423 kfree(ent);
424 } 424 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed) 239 u32 seed)
240{ 240{
241 struct nilfs_super_root *raw_sr; 241 struct nilfs_super_root *raw_sr;
242 struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
243 unsigned srsize;
242 u32 crc; 244 u32 crc;
243 245
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; 246 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
247 srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
245 crc = crc32_le(seed, 248 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), 249 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); 250 srsize - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc); 251 raw_sr->sr_sum = cpu_to_le32(crc);
249} 252}
250 253
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
254 257
255 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { 258 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
256 list_del_init(&bh->b_assoc_buffers); 259 list_del_init(&bh->b_assoc_buffers);
257 if (buffer_nilfs_allocated(bh)) {
258 struct page *clone_page = bh->b_page;
259
260 /* remove clone page */
261 brelse(bh);
262 page_cache_release(clone_page); /* for each bh */
263 if (page_count(clone_page) <= 2) {
264 lock_page(clone_page);
265 nilfs_free_private_page(clone_page);
266 }
267 continue;
268 }
269 brelse(bh); 260 brelse(bh);
270 } 261 }
271} 262}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
655 if (unlikely(page->index > last)) 655 if (unlikely(page->index > last))
656 break; 656 break;
657 657
658 if (mapping->host) { 658 lock_page(page);
659 lock_page(page); 659 if (!page_has_buffers(page))
660 if (!page_has_buffers(page)) 660 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
661 create_empty_buffers(page, 661 unlock_page(page);
662 1 << inode->i_blkbits, 0);
663 unlock_page(page);
664 }
665 662
666 bh = head = page_buffers(page); 663 bh = head = page_buffers(page);
667 do { 664 do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
809 /* The following code is duplicated with cpfile. But, it is 806 /* The following code is duplicated with cpfile. But, it is
810 needed to collect the checkpoint even if it was not newly 807 needed to collect the checkpoint even if it was not newly
811 created */ 808 created */
812 nilfs_mdt_mark_buffer_dirty(bh_cp); 809 mark_buffer_dirty(bh_cp);
813 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 810 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
814 nilfs_cpfile_put_checkpoint( 811 nilfs_cpfile_put_checkpoint(
815 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 812 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
889{ 886{
890 struct buffer_head *bh_sr; 887 struct buffer_head *bh_sr;
891 struct nilfs_super_root *raw_sr; 888 struct nilfs_super_root *raw_sr;
892 unsigned isz = nilfs->ns_inode_size; 889 unsigned isz, srsz;
893 890
894 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 891 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 892 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
893 isz = nilfs->ns_inode_size;
894 srsz = NILFS_SR_BYTES(isz);
896 895
897 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 896 raw_sr->sr_bytes = cpu_to_le16(srsz);
898 raw_sr->sr_nongc_ctime 897 raw_sr->sr_nongc_ctime
899 = cpu_to_le64(nilfs_doing_gc() ? 898 = cpu_to_le64(nilfs_doing_gc() ?
900 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 899 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
906 NILFS_SR_CPFILE_OFFSET(isz), 1); 905 NILFS_SR_CPFILE_OFFSET(isz), 1);
907 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
908 NILFS_SR_SUFILE_OFFSET(isz), 1); 907 NILFS_SR_SUFILE_OFFSET(isz), 1);
908 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
909} 909}
910 910
911static void nilfs_redirty_inodes(struct list_head *head) 911static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
954 954
955 dispose_buffers: 955 dispose_buffers:
956 while (!list_empty(listp)) { 956 while (!list_empty(listp)) {
957 bh = list_entry(listp->next, struct buffer_head, 957 bh = list_first_entry(listp, struct buffer_head,
958 b_assoc_buffers); 958 b_assoc_buffers);
959 list_del_init(&bh->b_assoc_buffers); 959 list_del_init(&bh->b_assoc_buffers);
960 brelse(bh); 960 brelse(bh);
961 } 961 }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1500 nblocks = le32_to_cpu(finfo->fi_nblocks); 1500 nblocks = le32_to_cpu(finfo->fi_nblocks);
1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1502 1502
1503 if (buffer_nilfs_node(bh)) 1503 inode = bh->b_page->mapping->host;
1504 inode = NILFS_BTNC_I(bh->b_page->mapping);
1505 else
1506 inode = NILFS_AS_I(bh->b_page->mapping);
1507 1504
1508 if (mode == SC_LSEG_DSYNC) 1505 if (mode == SC_LSEG_DSYNC)
1509 sc_op = &nilfs_sc_dsync_ops; 1506 sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556 return 0; 1553 return 0;
1557} 1554}
1558 1555
1559static int 1556static void nilfs_begin_page_io(struct page *page)
1560nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1561{
1562 struct page *clone_page;
1563 struct buffer_head *bh, *head, *bh2;
1564 void *kaddr;
1565
1566 bh = head = page_buffers(page);
1567
1568 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1569 if (unlikely(!clone_page))
1570 return -ENOMEM;
1571
1572 bh2 = page_buffers(clone_page);
1573 kaddr = kmap_atomic(page, KM_USER0);
1574 do {
1575 if (list_empty(&bh->b_assoc_buffers))
1576 continue;
1577 get_bh(bh2);
1578 page_cache_get(clone_page); /* for each bh */
1579 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1580 bh2->b_blocknr = bh->b_blocknr;
1581 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1582 list_add_tail(&bh->b_assoc_buffers, out);
1583 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1584 kunmap_atomic(kaddr, KM_USER0);
1585
1586 if (!TestSetPageWriteback(clone_page))
1587 account_page_writeback(clone_page);
1588 unlock_page(clone_page);
1589
1590 return 0;
1591}
1592
1593static int nilfs_test_page_to_be_frozen(struct page *page)
1594{
1595 struct address_space *mapping = page->mapping;
1596
1597 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1598 return 0;
1599
1600 if (page_mapped(page)) {
1601 ClearPageChecked(page);
1602 return 1;
1603 }
1604 return PageChecked(page);
1605}
1606
1607static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1608{ 1557{
1609 if (!page || PageWriteback(page)) 1558 if (!page || PageWriteback(page))
1610 /* For split b-tree node pages, this function may be called 1559 /* For split b-tree node pages, this function may be called
1611 twice. We ignore the 2nd or later calls by this check. */ 1560 twice. We ignore the 2nd or later calls by this check. */
1612 return 0; 1561 return;
1613 1562
1614 lock_page(page); 1563 lock_page(page);
1615 clear_page_dirty_for_io(page); 1564 clear_page_dirty_for_io(page);
1616 set_page_writeback(page); 1565 set_page_writeback(page);
1617 unlock_page(page); 1566 unlock_page(page);
1618
1619 if (nilfs_test_page_to_be_frozen(page)) {
1620 int err = nilfs_copy_replace_page_buffers(page, out);
1621 if (unlikely(err))
1622 return err;
1623 }
1624 return 0;
1625} 1567}
1626 1568
1627static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1569static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1628 struct page **failed_page)
1629{ 1570{
1630 struct nilfs_segment_buffer *segbuf; 1571 struct nilfs_segment_buffer *segbuf;
1631 struct page *bd_page = NULL, *fs_page = NULL; 1572 struct page *bd_page = NULL, *fs_page = NULL;
1632 struct list_head *list = &sci->sc_copied_buffers;
1633 int err;
1634 1573
1635 *failed_page = NULL;
1636 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1574 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1637 struct buffer_head *bh; 1575 struct buffer_head *bh;
1638 1576
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1662 break; 1600 break;
1663 } 1601 }
1664 if (bh->b_page != fs_page) { 1602 if (bh->b_page != fs_page) {
1665 err = nilfs_begin_page_io(fs_page, list); 1603 nilfs_begin_page_io(fs_page);
1666 if (unlikely(err)) {
1667 *failed_page = fs_page;
1668 goto out;
1669 }
1670 fs_page = bh->b_page; 1604 fs_page = bh->b_page;
1671 } 1605 }
1672 } 1606 }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1677 set_page_writeback(bd_page); 1611 set_page_writeback(bd_page);
1678 unlock_page(bd_page); 1612 unlock_page(bd_page);
1679 } 1613 }
1680 err = nilfs_begin_page_io(fs_page, list); 1614 nilfs_begin_page_io(fs_page);
1681 if (unlikely(err))
1682 *failed_page = fs_page;
1683 out:
1684 return err;
1685} 1615}
1686 1616
1687static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1617static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1694 return ret; 1624 return ret;
1695} 1625}
1696 1626
1697static void __nilfs_end_page_io(struct page *page, int err)
1698{
1699 if (!err) {
1700 if (!nilfs_page_buffers_clean(page))
1701 __set_page_dirty_nobuffers(page);
1702 ClearPageError(page);
1703 } else {
1704 __set_page_dirty_nobuffers(page);
1705 SetPageError(page);
1706 }
1707
1708 if (buffer_nilfs_allocated(page_buffers(page))) {
1709 if (TestClearPageWriteback(page))
1710 dec_zone_page_state(page, NR_WRITEBACK);
1711 } else
1712 end_page_writeback(page);
1713}
1714
1715static void nilfs_end_page_io(struct page *page, int err) 1627static void nilfs_end_page_io(struct page *page, int err)
1716{ 1628{
1717 if (!page) 1629 if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
1738 return; 1650 return;
1739 } 1651 }
1740 1652
1741 __nilfs_end_page_io(page, err); 1653 if (!err) {
1742} 1654 if (!nilfs_page_buffers_clean(page))
1743 1655 __set_page_dirty_nobuffers(page);
1744static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1656 ClearPageError(page);
1745{ 1657 } else {
1746 struct buffer_head *bh, *head; 1658 __set_page_dirty_nobuffers(page);
1747 struct page *page; 1659 SetPageError(page);
1748
1749 while (!list_empty(list)) {
1750 bh = list_entry(list->next, struct buffer_head,
1751 b_assoc_buffers);
1752 page = bh->b_page;
1753 page_cache_get(page);
1754 head = bh = page_buffers(page);
1755 do {
1756 if (!list_empty(&bh->b_assoc_buffers)) {
1757 list_del_init(&bh->b_assoc_buffers);
1758 if (!err) {
1759 set_buffer_uptodate(bh);
1760 clear_buffer_dirty(bh);
1761 clear_buffer_delay(bh);
1762 clear_buffer_nilfs_volatile(bh);
1763 }
1764 brelse(bh); /* for b_assoc_buffers */
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768 __nilfs_end_page_io(page, err);
1769 page_cache_release(page);
1770 } 1660 }
1661
1662 end_page_writeback(page);
1771} 1663}
1772 1664
1773static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1665static void nilfs_abort_logs(struct list_head *logs, int err)
1774 int err)
1775{ 1666{
1776 struct nilfs_segment_buffer *segbuf; 1667 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL; 1668 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1801 } 1692 }
1802 if (bh->b_page != fs_page) { 1693 if (bh->b_page != fs_page) {
1803 nilfs_end_page_io(fs_page, err); 1694 nilfs_end_page_io(fs_page, err);
1804 if (fs_page && fs_page == failed_page)
1805 return;
1806 fs_page = bh->b_page; 1695 fs_page = bh->b_page;
1807 } 1696 }
1808 } 1697 }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1821 1710
1822 list_splice_tail_init(&sci->sc_write_logs, &logs); 1711 list_splice_tail_init(&sci->sc_write_logs, &logs);
1823 ret = nilfs_wait_on_logs(&logs); 1712 ret = nilfs_wait_on_logs(&logs);
1824 nilfs_abort_logs(&logs, NULL, ret ? : err); 1713 nilfs_abort_logs(&logs, ret ? : err);
1825 1714
1826 list_splice_tail_init(&sci->sc_segbufs, &logs); 1715 list_splice_tail_init(&sci->sc_segbufs, &logs);
1827 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1716 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1828 nilfs_free_incomplete_logs(&logs, nilfs); 1717 nilfs_free_incomplete_logs(&logs, nilfs);
1829 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1830 1718
1831 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1719 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1832 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1720 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1920 1808
1921 nilfs_end_page_io(fs_page, 0); 1809 nilfs_end_page_io(fs_page, 0);
1922 1810
1923 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
1924
1925 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1811 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1926 1812
1927 if (nilfs_doing_gc()) 1813 if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1979 "failed to get inode block.\n"); 1865 "failed to get inode block.\n");
1980 return err; 1866 return err;
1981 } 1867 }
1982 nilfs_mdt_mark_buffer_dirty(ibh); 1868 mark_buffer_dirty(ibh);
1983 nilfs_mdt_mark_dirty(ifile); 1869 nilfs_mdt_mark_dirty(ifile);
1984 spin_lock(&nilfs->ns_inode_lock); 1870 spin_lock(&nilfs->ns_inode_lock);
1985 if (likely(!ii->i_bh)) 1871 if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1991 1877
1992 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1878 clear_bit(NILFS_I_QUEUED, &ii->i_state);
1993 set_bit(NILFS_I_BUSY, &ii->i_state); 1879 set_bit(NILFS_I_BUSY, &ii->i_state);
1994 list_del(&ii->i_dirty); 1880 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
1996 } 1881 }
1997 spin_unlock(&nilfs->ns_inode_lock); 1882 spin_unlock(&nilfs->ns_inode_lock);
1998 1883
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2014 clear_bit(NILFS_I_BUSY, &ii->i_state); 1899 clear_bit(NILFS_I_BUSY, &ii->i_state);
2015 brelse(ii->i_bh); 1900 brelse(ii->i_bh);
2016 ii->i_bh = NULL; 1901 ii->i_bh = NULL;
2017 list_del(&ii->i_dirty); 1902 list_move_tail(&ii->i_dirty, &ti->ti_garbage);
2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2019 } 1903 }
2020 spin_unlock(&nilfs->ns_inode_lock); 1904 spin_unlock(&nilfs->ns_inode_lock);
2021} 1905}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 1910static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2027{ 1911{
2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1912 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2029 struct page *failed_page;
2030 int err; 1913 int err;
2031 1914
2032 sci->sc_stage.scnt = NILFS_ST_INIT; 1915 sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2081 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 1964 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2082 1965
2083 /* Write partial segments */ 1966 /* Write partial segments */
2084 err = nilfs_segctor_prepare_write(sci, &failed_page); 1967 nilfs_segctor_prepare_write(sci);
2085 if (err) {
2086 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2087 goto failed_to_write;
2088 }
2089 1968
2090 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 1969 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2091 nilfs->ns_crc_seed); 1970 nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2687 INIT_LIST_HEAD(&sci->sc_segbufs); 2566 INIT_LIST_HEAD(&sci->sc_segbufs);
2688 INIT_LIST_HEAD(&sci->sc_write_logs); 2567 INIT_LIST_HEAD(&sci->sc_write_logs);
2689 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2568 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2690 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2691 init_timer(&sci->sc_timer); 2569 init_timer(&sci->sc_timer);
2692 2570
2693 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2571 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2741 if (flag || !nilfs_segctor_confirm(sci)) 2619 if (flag || !nilfs_segctor_confirm(sci))
2742 nilfs_segctor_write_out(sci); 2620 nilfs_segctor_write_out(sci);
2743 2621
2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2745
2746 if (!list_empty(&sci->sc_dirty_files)) { 2622 if (!list_empty(&sci->sc_dirty_files)) {
2747 nilfs_warning(sci->sc_super, __func__, 2623 nilfs_warning(sci->sc_super, __func__,
2748 "dirty file(s) after the final construction\n"); 2624 "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 94 * @sc_gc_inodes: List of GC inodes having blocks to be written
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_freesegs: array of segment numbers to be freed 95 * @sc_freesegs: array of segment numbers to be freed
97 * @sc_nfreesegs: number of segments on @sc_freesegs 96 * @sc_nfreesegs: number of segments on @sc_freesegs
98 * @sc_dsync_inode: inode whose data pages are written for a sync operation 97 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
136 135
137 struct list_head sc_dirty_files; 136 struct list_head sc_dirty_files;
138 struct list_head sc_gc_inodes; 137 struct list_head sc_gc_inodes;
139 struct list_head sc_copied_buffers;
140 138
141 __u64 *sc_freesegs; 139 __u64 *sc_freesegs;
142 size_t sc_nfreesegs; 140 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
33 33
34struct nilfs_sufile_info { 34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi; 35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs; 36 unsigned long ncleansegs;/* number of clean segments */
37 __u64 allocmin; /* lower limit of allocatable segment range */
38 __u64 allocmax; /* upper limit of allocatable segment range */
37}; 39};
38 40
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) 41static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
96 create, NULL, bhp); 98 create, NULL, bhp);
97} 99}
98 100
101static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
102 __u64 segnum)
103{
104 return nilfs_mdt_delete_block(sufile,
105 nilfs_sufile_get_blkoff(sufile, segnum));
106}
107
99static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, 108static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
100 u64 ncleanadd, u64 ndirtyadd) 109 u64 ncleanadd, u64 ndirtyadd)
101{ 110{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
109 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr, KM_USER0);
110 119
111 nilfs_mdt_mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
112} 121}
113 122
114/** 123/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
248} 257}
249 258
250/** 259/**
260 * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
261 * @sufile: inode of segment usage file
262 * @start: minimum segment number of allocatable region (inclusive)
263 * @end: maximum segment number of allocatable region (inclusive)
264 *
265 * Return Value: On success, 0 is returned. On error, one of the
266 * following negative error codes is returned.
267 *
268 * %-ERANGE - invalid segment region
269 */
270int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
271{
272 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
273 __u64 nsegs;
274 int ret = -ERANGE;
275
276 down_write(&NILFS_MDT(sufile)->mi_sem);
277 nsegs = nilfs_sufile_get_nsegments(sufile);
278
279 if (start <= end && end < nsegs) {
280 sui->allocmin = start;
281 sui->allocmax = end;
282 ret = 0;
283 }
284 up_write(&NILFS_MDT(sufile)->mi_sem);
285 return ret;
286}
287
288/**
251 * nilfs_sufile_alloc - allocate a segment 289 * nilfs_sufile_alloc - allocate a segment
252 * @sufile: inode of segment usage file 290 * @sufile: inode of segment usage file
253 * @segnump: pointer to segment number 291 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
269 struct buffer_head *header_bh, *su_bh; 307 struct buffer_head *header_bh, *su_bh;
270 struct nilfs_sufile_header *header; 308 struct nilfs_sufile_header *header;
271 struct nilfs_segment_usage *su; 309 struct nilfs_segment_usage *su;
310 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
272 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 311 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
273 __u64 segnum, maxsegnum, last_alloc; 312 __u64 segnum, maxsegnum, last_alloc;
274 void *kaddr; 313 void *kaddr;
275 unsigned long nsegments, ncleansegs, nsus; 314 unsigned long nsegments, ncleansegs, nsus, cnt;
276 int ret, i, j; 315 int ret, j;
277 316
278 down_write(&NILFS_MDT(sufile)->mi_sem); 317 down_write(&NILFS_MDT(sufile)->mi_sem);
279 318
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
287 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr, KM_USER0);
288 327
289 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax;
290 segnum = last_alloc + 1; 330 segnum = last_alloc + 1;
291 maxsegnum = nsegments - 1; 331 if (segnum < sui->allocmin || segnum > sui->allocmax)
292 for (i = 0; i < nsegments; i += nsus) { 332 segnum = sui->allocmin;
293 if (segnum >= nsegments) { 333
294 /* wrap around */ 334 for (cnt = 0; cnt < nsegments; cnt += nsus) {
295 segnum = 0; 335 if (segnum > maxsegnum) {
296 maxsegnum = last_alloc; 336 if (cnt < sui->allocmax - sui->allocmin + 1) {
337 /*
338 * wrap around in the limited region.
339 * if allocation started from
340 * sui->allocmin, this never happens.
341 */
342 segnum = sui->allocmin;
343 maxsegnum = last_alloc;
344 } else if (segnum > sui->allocmin &&
345 sui->allocmax + 1 < nsegments) {
346 segnum = sui->allocmax + 1;
347 maxsegnum = nsegments - 1;
348 } else if (sui->allocmin > 0) {
349 segnum = 0;
350 maxsegnum = sui->allocmin - 1;
351 } else {
352 break; /* never happens */
353 }
297 } 354 }
298 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 355 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
299 &su_bh); 356 &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
320 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr, KM_USER0);
321 378
322 NILFS_SUI(sufile)->ncleansegs--; 379 sui->ncleansegs--;
323 nilfs_mdt_mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
324 nilfs_mdt_mark_buffer_dirty(su_bh); 381 mark_buffer_dirty(su_bh);
325 nilfs_mdt_mark_dirty(sufile); 382 nilfs_mdt_mark_dirty(sufile);
326 brelse(su_bh); 383 brelse(su_bh);
327 *segnump = segnum; 384 *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
364 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
366 423
367 nilfs_mdt_mark_buffer_dirty(su_bh); 424 mark_buffer_dirty(su_bh);
368 nilfs_mdt_mark_dirty(sufile); 425 nilfs_mdt_mark_dirty(sufile);
369} 426}
370 427
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
397 454
398 nilfs_mdt_mark_buffer_dirty(su_bh); 455 mark_buffer_dirty(su_bh);
399 nilfs_mdt_mark_dirty(sufile); 456 nilfs_mdt_mark_dirty(sufile);
400} 457}
401 458
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
421 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
422 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
423 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr, KM_USER0);
424 nilfs_mdt_mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
425 482
426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++; 484 NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
441 498
442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); 499 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
443 if (!ret) { 500 if (!ret) {
444 nilfs_mdt_mark_buffer_dirty(bh); 501 mark_buffer_dirty(bh);
445 nilfs_mdt_mark_dirty(sufile); 502 nilfs_mdt_mark_dirty(sufile);
446 brelse(bh); 503 brelse(bh);
447 } 504 }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
476 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr, KM_USER0);
478 535
479 nilfs_mdt_mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
481 brelse(bh); 538 brelse(bh);
482 539
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
505{ 562{
506 struct buffer_head *header_bh; 563 struct buffer_head *header_bh;
507 struct nilfs_sufile_header *header; 564 struct nilfs_sufile_header *header;
508 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 565 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
509 void *kaddr; 566 void *kaddr;
510 int ret; 567 int ret;
511 568
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
555 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--; 613 NILFS_SUI(sufile)->ncleansegs--;
557 } 614 }
558 nilfs_mdt_mark_buffer_dirty(su_bh); 615 mark_buffer_dirty(su_bh);
559 nilfs_mdt_mark_dirty(sufile); 616 nilfs_mdt_mark_dirty(sufile);
560} 617}
561 618
562/** 619/**
620 * nilfs_sufile_truncate_range - truncate range of segment array
621 * @sufile: inode of segment usage file
622 * @start: start segment number (inclusive)
623 * @end: end segment number (inclusive)
624 *
625 * Return Value: On success, 0 is returned. On error, one of the
626 * following negative error codes is returned.
627 *
628 * %-EIO - I/O error.
629 *
630 * %-ENOMEM - Insufficient amount of memory available.
631 *
632 * %-EINVAL - Invalid number of segments specified
633 *
634 * %-EBUSY - Dirty or active segments are present in the range
635 */
636static int nilfs_sufile_truncate_range(struct inode *sufile,
637 __u64 start, __u64 end)
638{
639 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
640 struct buffer_head *header_bh;
641 struct buffer_head *su_bh;
642 struct nilfs_segment_usage *su, *su2;
643 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
644 unsigned long segusages_per_block;
645 unsigned long nsegs, ncleaned;
646 __u64 segnum;
647 void *kaddr;
648 ssize_t n, nc;
649 int ret;
650 int j;
651
652 nsegs = nilfs_sufile_get_nsegments(sufile);
653
654 ret = -EINVAL;
655 if (start > end || start >= nsegs)
656 goto out;
657
658 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
659 if (ret < 0)
660 goto out;
661
662 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
663 ncleaned = 0;
664
665 for (segnum = start; segnum <= end; segnum += n) {
666 n = min_t(unsigned long,
667 segusages_per_block -
668 nilfs_sufile_get_offset(sufile, segnum),
669 end - segnum + 1);
670 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
671 &su_bh);
672 if (ret < 0) {
673 if (ret != -ENOENT)
674 goto out_header;
675 /* hole */
676 continue;
677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr);
681 su2 = su;
682 for (j = 0; j < n; j++, su = (void *)su + susz) {
683 if ((le32_to_cpu(su->su_flags) &
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0);
688 brelse(su_bh);
689 goto out_header;
690 }
691 }
692 nc = 0;
693 for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
694 if (nilfs_segment_usage_error(su)) {
695 nilfs_segment_usage_set_clean(su);
696 nc++;
697 }
698 }
699 kunmap_atomic(kaddr, KM_USER0);
700 if (nc > 0) {
701 mark_buffer_dirty(su_bh);
702 ncleaned += nc;
703 }
704 brelse(su_bh);
705
706 if (n == segusages_per_block) {
707 /* make hole */
708 nilfs_sufile_delete_segment_usage_block(sufile, segnum);
709 }
710 }
711 ret = 0;
712
713out_header:
714 if (ncleaned > 0) {
715 NILFS_SUI(sufile)->ncleansegs += ncleaned;
716 nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
717 nilfs_mdt_mark_dirty(sufile);
718 }
719 brelse(header_bh);
720out:
721 return ret;
722}
723
724/**
725 * nilfs_sufile_resize - resize segment array
726 * @sufile: inode of segment usage file
727 * @newnsegs: new number of segments
728 *
729 * Return Value: On success, 0 is returned. On error, one of the
730 * following negative error codes is returned.
731 *
732 * %-EIO - I/O error.
733 *
734 * %-ENOMEM - Insufficient amount of memory available.
735 *
736 * %-ENOSPC - Enough free space is not left for shrinking
737 *
738 * %-EBUSY - Dirty or active segments exist in the region to be truncated
739 */
740int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
741{
742 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
743 struct buffer_head *header_bh;
744 struct nilfs_sufile_header *header;
745 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
746 void *kaddr;
747 unsigned long nsegs, nrsvsegs;
748 int ret = 0;
749
750 down_write(&NILFS_MDT(sufile)->mi_sem);
751
752 nsegs = nilfs_sufile_get_nsegments(sufile);
753 if (nsegs == newnsegs)
754 goto out;
755
756 ret = -ENOSPC;
757 nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
758 if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
759 goto out;
760
761 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
762 if (ret < 0)
763 goto out;
764
765 if (newnsegs > nsegs) {
766 sui->ncleansegs += newnsegs - nsegs;
767 } else /* newnsegs < nsegs */ {
768 ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
769 if (ret < 0)
770 goto out_header;
771
772 sui->ncleansegs -= nsegs - newnsegs;
773 }
774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0);
779
780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile);
782 nilfs_set_nsegments(nilfs, newnsegs);
783
784out_header:
785 brelse(header_bh);
786out:
787 up_write(&NILFS_MDT(sufile)->mi_sem);
788 return ret;
789}
790
791/**
563 * nilfs_sufile_get_suinfo - 792 * nilfs_sufile_get_suinfo -
564 * @sufile: inode of segment usage file 793 * @sufile: inode of segment usage file
565 * @segnum: segment number to start looking 794 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
583 struct nilfs_segment_usage *su; 812 struct nilfs_segment_usage *su;
584 struct nilfs_suinfo *si = buf; 813 struct nilfs_suinfo *si = buf;
585 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 814 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
586 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 815 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
587 void *kaddr; 816 void *kaddr;
588 unsigned long nsegs, segusages_per_block; 817 unsigned long nsegs, segusages_per_block;
589 ssize_t n; 818 ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
679 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr, KM_USER0);
680 brelse(header_bh); 909 brelse(header_bh);
681 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
912 sui->allocmin = 0;
913
682 unlock_new_inode(sufile); 914 unlock_new_inode(sufile);
683 out: 915 out:
684 *inodep = sufile; 916 *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
31 31
32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
33{ 33{
34 return NILFS_I_NILFS(sufile)->ns_nsegments; 34 return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); 37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38 38
39int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
39int nilfs_sufile_alloc(struct inode *, __u64 *); 40int nilfs_sufile_alloc(struct inode *, __u64 *);
40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); 41int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, 42int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *); 63 struct buffer_head *);
63 64
65int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
64int nilfs_sufile_read(struct super_block *sb, size_t susize, 66int nilfs_sufile_read(struct super_block *sb, size_t susize,
65 struct nilfs_inode *raw_inode, struct inode **inodep); 67 struct nilfs_inode *raw_inode, struct inode **inodep);
66 68
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
56#include "btnode.h" 56#include "btnode.h"
57#include "page.h" 57#include "page.h"
58#include "cpfile.h" 58#include "cpfile.h"
59#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
59#include "ifile.h" 60#include "ifile.h"
60#include "dat.h" 61#include "dat.h"
61#include "segment.h" 62#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
165 ii->i_state = 0; 166 ii->i_state = 0;
166 ii->i_cno = 0; 167 ii->i_cno = 0;
167 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
168 nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
169 return &ii->vfs_inode; 170 return &ii->vfs_inode;
170} 171}
171 172
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
347 return ret; 348 return ret;
348} 349}
349 350
351/**
352 * nilfs_move_2nd_super - relocate secondary super block
353 * @sb: super block instance
354 * @sb2off: new offset of the secondary super block (in bytes)
355 */
356static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
357{
358 struct the_nilfs *nilfs = sb->s_fs_info;
359 struct buffer_head *nsbh;
360 struct nilfs_super_block *nsbp;
361 sector_t blocknr, newblocknr;
362 unsigned long offset;
363 int sb2i = -1; /* array index of the secondary superblock */
364 int ret = 0;
365
366 /* nilfs->ns_sem must be locked by the caller. */
367 if (nilfs->ns_sbh[1] &&
368 nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
369 sb2i = 1;
370 blocknr = nilfs->ns_sbh[1]->b_blocknr;
371 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
372 sb2i = 0;
373 blocknr = nilfs->ns_sbh[0]->b_blocknr;
374 }
375 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
376 goto out; /* super block location is unchanged */
377
378 /* Get new super block buffer */
379 newblocknr = sb2off >> nilfs->ns_blocksize_bits;
380 offset = sb2off & (nilfs->ns_blocksize - 1);
381 nsbh = sb_getblk(sb, newblocknr);
382 if (!nsbh) {
383 printk(KERN_WARNING
384 "NILFS warning: unable to move secondary superblock "
385 "to block %llu\n", (unsigned long long)newblocknr);
386 ret = -EIO;
387 goto out;
388 }
389 nsbp = (void *)nsbh->b_data + offset;
390 memset(nsbp, 0, nilfs->ns_blocksize);
391
392 if (sb2i >= 0) {
393 memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
394 brelse(nilfs->ns_sbh[sb2i]);
395 nilfs->ns_sbh[sb2i] = nsbh;
396 nilfs->ns_sbp[sb2i] = nsbp;
397 } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
398 /* secondary super block will be restored to index 1 */
399 nilfs->ns_sbh[1] = nsbh;
400 nilfs->ns_sbp[1] = nsbp;
401 } else {
402 brelse(nsbh);
403 }
404out:
405 return ret;
406}
407
408/**
409 * nilfs_resize_fs - resize the filesystem
410 * @sb: super block instance
411 * @newsize: new size of the filesystem (in bytes)
412 */
413int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
414{
415 struct the_nilfs *nilfs = sb->s_fs_info;
416 struct nilfs_super_block **sbp;
417 __u64 devsize, newnsegs;
418 loff_t sb2off;
419 int ret;
420
421 ret = -ERANGE;
422 devsize = i_size_read(sb->s_bdev->bd_inode);
423 if (newsize > devsize)
424 goto out;
425
426 /*
427 * Write lock is required to protect some functions depending
428 * on the number of segments, the number of reserved segments,
429 * and so forth.
430 */
431 down_write(&nilfs->ns_segctor_sem);
432
433 sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
434 newnsegs = sb2off >> nilfs->ns_blocksize_bits;
435 do_div(newnsegs, nilfs->ns_blocks_per_segment);
436
437 ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
438 up_write(&nilfs->ns_segctor_sem);
439 if (ret < 0)
440 goto out;
441
442 ret = nilfs_construct_segment(sb);
443 if (ret < 0)
444 goto out;
445
446 down_write(&nilfs->ns_sem);
447 nilfs_move_2nd_super(sb, sb2off);
448 ret = -EIO;
449 sbp = nilfs_prepare_super(sb, 0);
450 if (likely(sbp)) {
451 nilfs_set_log_cursor(sbp[0], nilfs);
452 /*
453 * Drop NILFS_RESIZE_FS flag for compatibility with
454 * mount-time resize which may be implemented in a
455 * future release.
456 */
457 sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
458 ~NILFS_RESIZE_FS);
459 sbp[0]->s_dev_size = cpu_to_le64(newsize);
460 sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
461 if (sbp[1])
462 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
463 ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
464 }
465 up_write(&nilfs->ns_sem);
466
467 /*
468 * Reset the range of allocatable segments last. This order
469 * is important in the case of expansion because the secondary
470 * superblock must be protected from log write until migration
471 * completes.
472 */
473 if (!ret)
474 nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
475out:
476 return ret;
477}
478
350static void nilfs_put_super(struct super_block *sb) 479static void nilfs_put_super(struct super_block *sb)
351{ 480{
352 struct the_nilfs *nilfs = sb->s_fs_info; 481 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
363 return res; 363 return res;
364} 364}
365 365
366/**
367 * nilfs_nrsvsegs - calculate the number of reserved segments
368 * @nilfs: nilfs object
369 * @nsegs: total number of segments
370 */
371unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
372{
373 return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
374 DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
375 100));
376}
377
378void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
379{
380 nilfs->ns_nsegments = nsegs;
381 nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
382}
383
366static int nilfs_store_disk_layout(struct the_nilfs *nilfs, 384static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
367 struct nilfs_super_block *sbp) 385 struct nilfs_super_block *sbp)
368{ 386{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
389 } 407 }
390 408
391 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
392 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
393 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
394 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
395 nilfs->ns_nrsvsegs = 412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
396 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
397 DIV_ROUND_UP(nilfs->ns_nsegments *
398 nilfs->ns_r_segments_percentage, 100));
399 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
400 return 0; 414 return 0;
401} 415}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
268void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
271unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
272void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 273int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 274int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 275struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index d8a0313e99e6..f17e58b32989 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -30,6 +30,7 @@ ocfs2-objs := \
30 namei.o \ 30 namei.o \
31 refcounttree.o \ 31 refcounttree.o \
32 reservations.o \ 32 reservations.o \
33 move_extents.o \
33 resize.o \ 34 resize.o \
34 slot_map.o \ 35 slot_map.o \
35 suballoc.o \ 36 suballoc.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 48aa9c7401c7..ed553c60de82 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -29,6 +29,7 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/quotaops.h> 31#include <linux/quotaops.h>
32#include <linux/blkdev.h>
32 33
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
34 35
@@ -7184,3 +7185,168 @@ out_commit:
7184out: 7185out:
7185 return ret; 7186 return ret;
7186} 7187}
7188
7189static int ocfs2_trim_extent(struct super_block *sb,
7190 struct ocfs2_group_desc *gd,
7191 u32 start, u32 count)
7192{
7193 u64 discard, bcount;
7194
7195 bcount = ocfs2_clusters_to_blocks(sb, count);
7196 discard = le64_to_cpu(gd->bg_blkno) +
7197 ocfs2_clusters_to_blocks(sb, start);
7198
7199 trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
7200
7201 return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
7202}
7203
7204static int ocfs2_trim_group(struct super_block *sb,
7205 struct ocfs2_group_desc *gd,
7206 u32 start, u32 max, u32 minbits)
7207{
7208 int ret = 0, count = 0, next;
7209 void *bitmap = gd->bg_bitmap;
7210
7211 if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
7212 return 0;
7213
7214 trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
7215 start, max, minbits);
7216
7217 while (start < max) {
7218 start = ocfs2_find_next_zero_bit(bitmap, max, start);
7219 if (start >= max)
7220 break;
7221 next = ocfs2_find_next_bit(bitmap, max, start);
7222
7223 if ((next - start) >= minbits) {
7224 ret = ocfs2_trim_extent(sb, gd,
7225 start, next - start);
7226 if (ret < 0) {
7227 mlog_errno(ret);
7228 break;
7229 }
7230 count += next - start;
7231 }
7232 start = next + 1;
7233
7234 if (fatal_signal_pending(current)) {
7235 count = -ERESTARTSYS;
7236 break;
7237 }
7238
7239 if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
7240 break;
7241 }
7242
7243 if (ret < 0)
7244 count = ret;
7245
7246 return count;
7247}
7248
7249int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7250{
7251 struct ocfs2_super *osb = OCFS2_SB(sb);
7252 u64 start, len, trimmed, first_group, last_group, group;
7253 int ret, cnt;
7254 u32 first_bit, last_bit, minlen;
7255 struct buffer_head *main_bm_bh = NULL;
7256 struct inode *main_bm_inode = NULL;
7257 struct buffer_head *gd_bh = NULL;
7258 struct ocfs2_dinode *main_bm;
7259 struct ocfs2_group_desc *gd = NULL;
7260
7261 start = range->start >> osb->s_clustersize_bits;
7262 len = range->len >> osb->s_clustersize_bits;
7263 minlen = range->minlen >> osb->s_clustersize_bits;
7264 trimmed = 0;
7265
7266 if (!len) {
7267 range->len = 0;
7268 return 0;
7269 }
7270
7271 if (minlen >= osb->bitmap_cpg)
7272 return -EINVAL;
7273
7274 main_bm_inode = ocfs2_get_system_file_inode(osb,
7275 GLOBAL_BITMAP_SYSTEM_INODE,
7276 OCFS2_INVALID_SLOT);
7277 if (!main_bm_inode) {
7278 ret = -EIO;
7279 mlog_errno(ret);
7280 goto out;
7281 }
7282
7283 mutex_lock(&main_bm_inode->i_mutex);
7284
7285 ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
7286 if (ret < 0) {
7287 mlog_errno(ret);
7288 goto out_mutex;
7289 }
7290 main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
7291
7292 if (start >= le32_to_cpu(main_bm->i_clusters)) {
7293 ret = -EINVAL;
7294 goto out_unlock;
7295 }
7296
7297 if (start + len > le32_to_cpu(main_bm->i_clusters))
7298 len = le32_to_cpu(main_bm->i_clusters) - start;
7299
7300 trace_ocfs2_trim_fs(start, len, minlen);
7301
7302 /* Determine first and last group to examine based on start and len */
7303 first_group = ocfs2_which_cluster_group(main_bm_inode, start);
7304 if (first_group == osb->first_cluster_group_blkno)
7305 first_bit = start;
7306 else
7307 first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
7308 last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
7309 last_bit = osb->bitmap_cpg;
7310
7311 for (group = first_group; group <= last_group;) {
7312 if (first_bit + len >= osb->bitmap_cpg)
7313 last_bit = osb->bitmap_cpg;
7314 else
7315 last_bit = first_bit + len;
7316
7317 ret = ocfs2_read_group_descriptor(main_bm_inode,
7318 main_bm, group,
7319 &gd_bh);
7320 if (ret < 0) {
7321 mlog_errno(ret);
7322 break;
7323 }
7324
7325 gd = (struct ocfs2_group_desc *)gd_bh->b_data;
7326 cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
7327 brelse(gd_bh);
7328 gd_bh = NULL;
7329 if (cnt < 0) {
7330 ret = cnt;
7331 mlog_errno(ret);
7332 break;
7333 }
7334
7335 trimmed += cnt;
7336 len -= osb->bitmap_cpg - first_bit;
7337 first_bit = 0;
7338 if (group == osb->first_cluster_group_blkno)
7339 group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
7340 else
7341 group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
7342 }
7343 range->len = trimmed * sb->s_blocksize;
7344out_unlock:
7345 ocfs2_inode_unlock(main_bm_inode, 0);
7346 brelse(main_bm_bh);
7347out_mutex:
7348 mutex_unlock(&main_bm_inode->i_mutex);
7349 iput(main_bm_inode);
7350out:
7351 return ret;
7352}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3bd08a03251c..ca381c584127 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
239 struct buffer_head **leaf_bh); 239 struct buffer_head **leaf_bh);
240int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); 240int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
241 241
242int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
242/* 243/*
243 * Helper function to look at the # of clusters in an extent record. 244 * Helper function to look at the # of clusters in an extent record.
244 */ 245 */
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index bc702dab5d1f..a4b07730b2e1 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -57,7 +57,6 @@ static struct kset *o2cb_kset;
57void o2cb_sys_shutdown(void) 57void o2cb_sys_shutdown(void)
58{ 58{
59 mlog_sys_shutdown(); 59 mlog_sys_shutdown();
60 sysfs_remove_link(NULL, "o2cb");
61 kset_unregister(o2cb_kset); 60 kset_unregister(o2cb_kset);
62} 61}
63 62
@@ -69,14 +68,6 @@ int o2cb_sys_init(void)
69 if (!o2cb_kset) 68 if (!o2cb_kset)
70 return -ENOMEM; 69 return -ENOMEM;
71 70
72 /*
73 * Create this symlink for backwards compatibility with old
74 * versions of ocfs2-tools which look for things in /sys/o2cb.
75 */
76 ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
77 if (ret)
78 goto error;
79
80 ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); 71 ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
81 if (ret) 72 if (ret)
82 goto error; 73 goto error;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4bdf7baee344..d602abb51b61 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -144,6 +144,7 @@ struct dlm_ctxt
144 wait_queue_head_t dlm_join_events; 144 wait_queue_head_t dlm_join_events;
145 unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 145 unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
146 unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 146 unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
147 unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
147 unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 148 unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
148 struct dlm_recovery_ctxt reco; 149 struct dlm_recovery_ctxt reco;
149 spinlock_t master_lock; 150 spinlock_t master_lock;
@@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb)
401 return 1; 402 return 1;
402} 403}
403 404
405static inline char *dlm_list_in_text(enum dlm_lockres_list idx)
406{
407 if (idx == DLM_GRANTED_LIST)
408 return "granted";
409 else if (idx == DLM_CONVERTING_LIST)
410 return "converting";
411 else if (idx == DLM_BLOCKED_LIST)
412 return "blocked";
413 else
414 return "unknown";
415}
416
404static inline struct list_head * 417static inline struct list_head *
405dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) 418dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
406{ 419{
@@ -448,6 +461,7 @@ enum {
448 DLM_FINALIZE_RECO_MSG = 518, 461 DLM_FINALIZE_RECO_MSG = 518,
449 DLM_QUERY_REGION = 519, 462 DLM_QUERY_REGION = 519,
450 DLM_QUERY_NODEINFO = 520, 463 DLM_QUERY_NODEINFO = 520,
464 DLM_BEGIN_EXIT_DOMAIN_MSG = 521,
451}; 465};
452 466
453struct dlm_reco_node_data 467struct dlm_reco_node_data
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 04a32be0aeb9..56f82cb912e3 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
756 buf + out, len - out); 756 buf + out, len - out);
757 out += snprintf(buf + out, len - out, "\n"); 757 out += snprintf(buf + out, len - out, "\n");
758 758
759 /* Exit Domain Map: xx xx xx */
760 out += snprintf(buf + out, len - out, "Exit Domain Map: ");
761 out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES,
762 buf + out, len - out);
763 out += snprintf(buf + out, len - out, "\n");
764
759 /* Live Map: xx xx xx */ 765 /* Live Map: xx xx xx */
760 out += snprintf(buf + out, len - out, "Live Map: "); 766 out += snprintf(buf + out, len - out, "Live Map: ");
761 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, 767 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 3b179d6cbde0..6ed6b95dcf93 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
132 * New in version 1.1: 132 * New in version 1.1:
133 * - Message DLM_QUERY_REGION added to support global heartbeat 133 * - Message DLM_QUERY_REGION added to support global heartbeat
134 * - Message DLM_QUERY_NODEINFO added to allow online node removes 134 * - Message DLM_QUERY_NODEINFO added to allow online node removes
135 * New in version 1.2:
136 * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain
135 */ 137 */
136static const struct dlm_protocol_version dlm_protocol = { 138static const struct dlm_protocol_version dlm_protocol = {
137 .pv_major = 1, 139 .pv_major = 1,
138 .pv_minor = 1, 140 .pv_minor = 2,
139}; 141};
140 142
141#define DLM_DOMAIN_BACKOFF_MS 200 143#define DLM_DOMAIN_BACKOFF_MS 200
@@ -449,14 +451,18 @@ redo_bucket:
449 dropped = dlm_empty_lockres(dlm, res); 451 dropped = dlm_empty_lockres(dlm, res);
450 452
451 spin_lock(&res->spinlock); 453 spin_lock(&res->spinlock);
452 __dlm_lockres_calc_usage(dlm, res); 454 if (dropped)
453 iter = res->hash_node.next; 455 __dlm_lockres_calc_usage(dlm, res);
456 else
457 iter = res->hash_node.next;
454 spin_unlock(&res->spinlock); 458 spin_unlock(&res->spinlock);
455 459
456 dlm_lockres_put(res); 460 dlm_lockres_put(res);
457 461
458 if (dropped) 462 if (dropped) {
463 cond_resched_lock(&dlm->spinlock);
459 goto redo_bucket; 464 goto redo_bucket;
465 }
460 } 466 }
461 cond_resched_lock(&dlm->spinlock); 467 cond_resched_lock(&dlm->spinlock);
462 num += n; 468 num += n;
@@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm)
486 return ret; 492 return ret;
487} 493}
488 494
495static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len,
496 void *data, void **ret_data)
497{
498 struct dlm_ctxt *dlm = data;
499 unsigned int node;
500 struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf;
501
502 if (!dlm_grab(dlm))
503 return 0;
504
505 node = exit_msg->node_idx;
506 mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node);
507
508 spin_lock(&dlm->spinlock);
509 set_bit(node, dlm->exit_domain_map);
510 spin_unlock(&dlm->spinlock);
511
512 dlm_put(dlm);
513
514 return 0;
515}
516
489static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) 517static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm)
490{ 518{
491 /* Yikes, a double spinlock! I need domain_lock for the dlm 519 /* Yikes, a double spinlock! I need domain_lock for the dlm
@@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
542 570
543 spin_lock(&dlm->spinlock); 571 spin_lock(&dlm->spinlock);
544 clear_bit(node, dlm->domain_map); 572 clear_bit(node, dlm->domain_map);
573 clear_bit(node, dlm->exit_domain_map);
545 __dlm_print_nodes(dlm); 574 __dlm_print_nodes(dlm);
546 575
547 /* notify anything attached to the heartbeat events */ 576 /* notify anything attached to the heartbeat events */
@@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
554 return 0; 583 return 0;
555} 584}
556 585
557static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, 586static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type,
558 unsigned int node) 587 unsigned int node)
559{ 588{
560 int status; 589 int status;
561 struct dlm_exit_domain leave_msg; 590 struct dlm_exit_domain leave_msg;
562 591
563 mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", 592 mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name,
564 node, dlm->name, dlm->node_num); 593 msg_type, node);
565 594
566 memset(&leave_msg, 0, sizeof(leave_msg)); 595 memset(&leave_msg, 0, sizeof(leave_msg));
567 leave_msg.node_idx = dlm->node_num; 596 leave_msg.node_idx = dlm->node_num;
568 597
569 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, 598 status = o2net_send_message(msg_type, dlm->key, &leave_msg,
570 &leave_msg, sizeof(leave_msg), node, 599 sizeof(leave_msg), node, NULL);
571 NULL);
572 if (status < 0) 600 if (status < 0)
573 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 601 mlog(ML_ERROR, "Error %d sending domain exit message %u "
574 "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); 602 "to node %u on domain %s\n", status, msg_type, node,
575 mlog(0, "status return %d from o2net_send_message\n", status); 603 dlm->name);
576 604
577 return status; 605 return status;
578} 606}
579 607
608static void dlm_begin_exit_domain(struct dlm_ctxt *dlm)
609{
610 int node = -1;
611
612 /* Support for begin exit domain was added in 1.2 */
613 if (dlm->dlm_locking_proto.pv_major == 1 &&
614 dlm->dlm_locking_proto.pv_minor < 2)
615 return;
616
617 /*
618 * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely
619 * informational. Meaning if a node does not receive the message,
620 * so be it.
621 */
622 spin_lock(&dlm->spinlock);
623 while (1) {
624 node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1);
625 if (node >= O2NM_MAX_NODES)
626 break;
627 if (node == dlm->node_num)
628 continue;
629
630 spin_unlock(&dlm->spinlock);
631 dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node);
632 spin_lock(&dlm->spinlock);
633 }
634 spin_unlock(&dlm->spinlock);
635}
580 636
581static void dlm_leave_domain(struct dlm_ctxt *dlm) 637static void dlm_leave_domain(struct dlm_ctxt *dlm)
582{ 638{
@@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
602 658
603 clear_node = 1; 659 clear_node = 1;
604 660
605 status = dlm_send_one_domain_exit(dlm, node); 661 status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG,
662 node);
606 if (status < 0 && 663 if (status < 0 &&
607 status != -ENOPROTOOPT && 664 status != -ENOPROTOOPT &&
608 status != -ENOTCONN) { 665 status != -ENOTCONN) {
@@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
677 734
678 if (leave) { 735 if (leave) {
679 mlog(0, "shutting down domain %s\n", dlm->name); 736 mlog(0, "shutting down domain %s\n", dlm->name);
737 dlm_begin_exit_domain(dlm);
680 738
681 /* We changed dlm state, notify the thread */ 739 /* We changed dlm state, notify the thread */
682 dlm_kick_thread(dlm, NULL); 740 dlm_kick_thread(dlm, NULL);
@@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
909 * leftover join state. */ 967 * leftover join state. */
910 BUG_ON(dlm->joining_node != assert->node_idx); 968 BUG_ON(dlm->joining_node != assert->node_idx);
911 set_bit(assert->node_idx, dlm->domain_map); 969 set_bit(assert->node_idx, dlm->domain_map);
970 clear_bit(assert->node_idx, dlm->exit_domain_map);
912 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 971 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
913 972
914 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", 973 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
@@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
1793 if (status) 1852 if (status)
1794 goto bail; 1853 goto bail;
1795 1854
1855 status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key,
1856 sizeof(struct dlm_exit_domain),
1857 dlm_begin_exit_domain_handler,
1858 dlm, NULL, &dlm->dlm_domain_handlers);
1859 if (status)
1860 goto bail;
1861
1796bail: 1862bail:
1797 if (status) 1863 if (status)
1798 dlm_unregister_domain_handlers(dlm); 1864 dlm_unregister_domain_handlers(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 84d166328cf7..11eefb8c12e9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2339 dlm_lockres_put(res); 2339 dlm_lockres_put(res);
2340} 2340}
2341 2341
2342/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 2342/*
2343 * if not. If 0, numlocks is set to the number of locks in the lockres. 2343 * A migrateable resource is one that is :
2344 * 1. locally mastered, and,
2345 * 2. zero local locks, and,
2346 * 3. one or more non-local locks, or, one or more references
2347 * Returns 1 if yes, 0 if not.
2344 */ 2348 */
2345static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2349static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2346 struct dlm_lock_resource *res, 2350 struct dlm_lock_resource *res)
2347 int *numlocks,
2348 int *hasrefs)
2349{ 2351{
2350 int ret; 2352 enum dlm_lockres_list idx;
2351 int i; 2353 int nonlocal = 0, node_ref;
2352 int count = 0;
2353 struct list_head *queue; 2354 struct list_head *queue;
2354 struct dlm_lock *lock; 2355 struct dlm_lock *lock;
2356 u64 cookie;
2355 2357
2356 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2357 2359
2358 *numlocks = 0; 2360 if (res->owner != dlm->node_num)
2359 *hasrefs = 0; 2361 return 0;
2360
2361 ret = -EINVAL;
2362 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2363 mlog(0, "cannot migrate lockres with unknown owner!\n");
2364 goto leave;
2365 }
2366
2367 if (res->owner != dlm->node_num) {
2368 mlog(0, "cannot migrate lockres this node doesn't own!\n");
2369 goto leave;
2370 }
2371 2362
2372 ret = 0; 2363 for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
2373 queue = &res->granted; 2364 queue = dlm_list_idx_to_ptr(res, idx);
2374 for (i = 0; i < 3; i++) {
2375 list_for_each_entry(lock, queue, list) { 2365 list_for_each_entry(lock, queue, list) {
2376 ++count; 2366 if (lock->ml.node != dlm->node_num) {
2377 if (lock->ml.node == dlm->node_num) { 2367 nonlocal++;
2378 mlog(0, "found a lock owned by this node still " 2368 continue;
2379 "on the %s queue! will not migrate this "
2380 "lockres\n", (i == 0 ? "granted" :
2381 (i == 1 ? "converting" :
2382 "blocked")));
2383 ret = -ENOTEMPTY;
2384 goto leave;
2385 } 2369 }
2370 cookie = be64_to_cpu(lock->ml.cookie);
2371 mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on "
2372 "%s list\n", dlm->name, res->lockname.len,
2373 res->lockname.name,
2374 dlm_get_lock_cookie_node(cookie),
2375 dlm_get_lock_cookie_seq(cookie),
2376 dlm_list_in_text(idx));
2377 return 0;
2386 } 2378 }
2387 queue++;
2388 } 2379 }
2389 2380
2390 *numlocks = count; 2381 if (!nonlocal) {
2391 2382 node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2392 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); 2383 if (node_ref >= O2NM_MAX_NODES)
2393 if (count < O2NM_MAX_NODES) 2384 return 0;
2394 *hasrefs = 1; 2385 }
2395 2386
2396 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, 2387 mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len,
2397 res->lockname.len, res->lockname.name, *numlocks, *hasrefs); 2388 res->lockname.name);
2398 2389
2399leave: 2390 return 1;
2400 return ret;
2401} 2391}
2402 2392
2403/* 2393/*
@@ -2406,8 +2396,7 @@ leave:
2406 2396
2407 2397
2408static int dlm_migrate_lockres(struct dlm_ctxt *dlm, 2398static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2409 struct dlm_lock_resource *res, 2399 struct dlm_lock_resource *res, u8 target)
2410 u8 target)
2411{ 2400{
2412 struct dlm_master_list_entry *mle = NULL; 2401 struct dlm_master_list_entry *mle = NULL;
2413 struct dlm_master_list_entry *oldmle = NULL; 2402 struct dlm_master_list_entry *oldmle = NULL;
@@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2416 const char *name; 2405 const char *name;
2417 unsigned int namelen; 2406 unsigned int namelen;
2418 int mle_added = 0; 2407 int mle_added = 0;
2419 int numlocks, hasrefs;
2420 int wake = 0; 2408 int wake = 0;
2421 2409
2422 if (!dlm_grab(dlm)) 2410 if (!dlm_grab(dlm))
2423 return -EINVAL; 2411 return -EINVAL;
2424 2412
2413 BUG_ON(target == O2NM_MAX_NODES);
2414
2425 name = res->lockname.name; 2415 name = res->lockname.name;
2426 namelen = res->lockname.len; 2416 namelen = res->lockname.len;
2427 2417
2428 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); 2418 mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
2429 2419 target);
2430 /*
2431 * ensure this lockres is a proper candidate for migration
2432 */
2433 spin_lock(&res->spinlock);
2434 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2435 if (ret < 0) {
2436 spin_unlock(&res->spinlock);
2437 goto leave;
2438 }
2439 spin_unlock(&res->spinlock);
2440
2441 /* no work to do */
2442 if (numlocks == 0 && !hasrefs)
2443 goto leave;
2444
2445 /*
2446 * preallocate up front
2447 * if this fails, abort
2448 */
2449 2420
2421 /* preallocate up front. if this fails, abort */
2450 ret = -ENOMEM; 2422 ret = -ENOMEM;
2451 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); 2423 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
2452 if (!mres) { 2424 if (!mres) {
@@ -2462,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2462 ret = 0; 2434 ret = 0;
2463 2435
2464 /* 2436 /*
2465 * find a node to migrate the lockres to
2466 */
2467
2468 spin_lock(&dlm->spinlock);
2469 /* pick a new node */
2470 if (!test_bit(target, dlm->domain_map) ||
2471 target >= O2NM_MAX_NODES) {
2472 target = dlm_pick_migration_target(dlm, res);
2473 }
2474 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2475 namelen, name, target);
2476
2477 if (target >= O2NM_MAX_NODES ||
2478 !test_bit(target, dlm->domain_map)) {
2479 /* target chosen is not alive */
2480 ret = -EINVAL;
2481 }
2482
2483 if (ret) {
2484 spin_unlock(&dlm->spinlock);
2485 goto fail;
2486 }
2487
2488 mlog(0, "continuing with target = %u\n", target);
2489
2490 /*
2491 * clear any existing master requests and 2437 * clear any existing master requests and
2492 * add the migration mle to the list 2438 * add the migration mle to the list
2493 */ 2439 */
2440 spin_lock(&dlm->spinlock);
2494 spin_lock(&dlm->master_lock); 2441 spin_lock(&dlm->master_lock);
2495 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, 2442 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
2496 namelen, target, dlm->node_num); 2443 namelen, target, dlm->node_num);
@@ -2531,6 +2478,7 @@ fail:
2531 dlm_put_mle(mle); 2478 dlm_put_mle(mle);
2532 } else if (mle) { 2479 } else if (mle) {
2533 kmem_cache_free(dlm_mle_cache, mle); 2480 kmem_cache_free(dlm_mle_cache, mle);
2481 mle = NULL;
2534 } 2482 }
2535 goto leave; 2483 goto leave;
2536 } 2484 }
@@ -2652,69 +2600,52 @@ leave:
2652 if (wake) 2600 if (wake)
2653 wake_up(&res->wq); 2601 wake_up(&res->wq);
2654 2602
2655 /* TODO: cleanup */
2656 if (mres) 2603 if (mres)
2657 free_page((unsigned long)mres); 2604 free_page((unsigned long)mres);
2658 2605
2659 dlm_put(dlm); 2606 dlm_put(dlm);
2660 2607
2661 mlog(0, "returning %d\n", ret); 2608 mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen,
2609 name, target, ret);
2662 return ret; 2610 return ret;
2663} 2611}
2664 2612
2665#define DLM_MIGRATION_RETRY_MS 100 2613#define DLM_MIGRATION_RETRY_MS 100
2666 2614
2667/* Should be called only after beginning the domain leave process. 2615/*
2616 * Should be called only after beginning the domain leave process.
2668 * There should not be any remaining locks on nonlocal lock resources, 2617 * There should not be any remaining locks on nonlocal lock resources,
2669 * and there should be no local locks left on locally mastered resources. 2618 * and there should be no local locks left on locally mastered resources.
2670 * 2619 *
2671 * Called with the dlm spinlock held, may drop it to do migration, but 2620 * Called with the dlm spinlock held, may drop it to do migration, but
2672 * will re-acquire before exit. 2621 * will re-acquire before exit.
2673 * 2622 *
2674 * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ 2623 * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
2624 */
2675int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 2625int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2676{ 2626{
2677 int ret; 2627 int ret;
2678 int lock_dropped = 0; 2628 int lock_dropped = 0;
2679 int numlocks, hasrefs; 2629 u8 target = O2NM_MAX_NODES;
2630
2631 assert_spin_locked(&dlm->spinlock);
2680 2632
2681 spin_lock(&res->spinlock); 2633 spin_lock(&res->spinlock);
2682 if (res->owner != dlm->node_num) { 2634 if (dlm_is_lockres_migrateable(dlm, res))
2683 if (!__dlm_lockres_unused(res)) { 2635 target = dlm_pick_migration_target(dlm, res);
2684 mlog(ML_ERROR, "%s:%.*s: this node is not master, " 2636 spin_unlock(&res->spinlock);
2685 "trying to free this but locks remain\n",
2686 dlm->name, res->lockname.len, res->lockname.name);
2687 }
2688 spin_unlock(&res->spinlock);
2689 goto leave;
2690 }
2691 2637
2692 /* No need to migrate a lockres having no locks */ 2638 if (target == O2NM_MAX_NODES)
2693 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2694 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2695 spin_unlock(&res->spinlock);
2696 goto leave; 2639 goto leave;
2697 }
2698 spin_unlock(&res->spinlock);
2699 2640
2700 /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ 2641 /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
2701 spin_unlock(&dlm->spinlock); 2642 spin_unlock(&dlm->spinlock);
2702 lock_dropped = 1; 2643 lock_dropped = 1;
2703 while (1) { 2644 ret = dlm_migrate_lockres(dlm, res, target);
2704 ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); 2645 if (ret)
2705 if (ret >= 0) 2646 mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
2706 break; 2647 dlm->name, res->lockname.len, res->lockname.name,
2707 if (ret == -ENOTEMPTY) { 2648 target, ret);
2708 mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
2709 res->lockname.len, res->lockname.name);
2710 BUG();
2711 }
2712
2713 mlog(0, "lockres %.*s: migrate failed, "
2714 "retrying\n", res->lockname.len,
2715 res->lockname.name);
2716 msleep(DLM_MIGRATION_RETRY_MS);
2717 }
2718 spin_lock(&dlm->spinlock); 2649 spin_lock(&dlm->spinlock);
2719leave: 2650leave:
2720 return lock_dropped; 2651 return lock_dropped;
@@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2898 } 2829 }
2899} 2830}
2900 2831
2901/* for now this is not too intelligent. we will 2832/*
2902 * need stats to make this do the right thing. 2833 * Pick a node to migrate the lock resource to. This function selects a
2903 * this just finds the first lock on one of the 2834 * potential target based first on the locks and then on refmap. It skips
2904 * queues and uses that node as the target. */ 2835 * nodes that are in the process of exiting the domain.
2836 */
2905static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, 2837static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2906 struct dlm_lock_resource *res) 2838 struct dlm_lock_resource *res)
2907{ 2839{
2908 int i; 2840 enum dlm_lockres_list idx;
2909 struct list_head *queue = &res->granted; 2841 struct list_head *queue = &res->granted;
2910 struct dlm_lock *lock; 2842 struct dlm_lock *lock;
2911 int nodenum; 2843 int noderef;
2844 u8 nodenum = O2NM_MAX_NODES;
2912 2845
2913 assert_spin_locked(&dlm->spinlock); 2846 assert_spin_locked(&dlm->spinlock);
2847 assert_spin_locked(&res->spinlock);
2914 2848
2915 spin_lock(&res->spinlock); 2849 /* Go through all the locks */
2916 for (i=0; i<3; i++) { 2850 for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
2851 queue = dlm_list_idx_to_ptr(res, idx);
2917 list_for_each_entry(lock, queue, list) { 2852 list_for_each_entry(lock, queue, list) {
2918 /* up to the caller to make sure this node 2853 if (lock->ml.node == dlm->node_num)
2919 * is alive */ 2854 continue;
2920 if (lock->ml.node != dlm->node_num) { 2855 if (test_bit(lock->ml.node, dlm->exit_domain_map))
2921 spin_unlock(&res->spinlock); 2856 continue;
2922 return lock->ml.node; 2857 nodenum = lock->ml.node;
2923 } 2858 goto bail;
2924 } 2859 }
2925 queue++;
2926 }
2927
2928 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2929 if (nodenum < O2NM_MAX_NODES) {
2930 spin_unlock(&res->spinlock);
2931 return nodenum;
2932 } 2860 }
2933 spin_unlock(&res->spinlock);
2934 mlog(0, "have not found a suitable target yet! checking domain map\n");
2935 2861
2936 /* ok now we're getting desperate. pick anyone alive. */ 2862 /* Go thru the refmap */
2937 nodenum = -1; 2863 noderef = -1;
2938 while (1) { 2864 while (1) {
2939 nodenum = find_next_bit(dlm->domain_map, 2865 noderef = find_next_bit(res->refmap, O2NM_MAX_NODES,
2940 O2NM_MAX_NODES, nodenum+1); 2866 noderef + 1);
2941 mlog(0, "found %d in domain map\n", nodenum); 2867 if (noderef >= O2NM_MAX_NODES)
2942 if (nodenum >= O2NM_MAX_NODES)
2943 break; 2868 break;
2944 if (nodenum != dlm->node_num) { 2869 if (noderef == dlm->node_num)
2945 mlog(0, "picking %d\n", nodenum); 2870 continue;
2946 return nodenum; 2871 if (test_bit(noderef, dlm->exit_domain_map))
2947 } 2872 continue;
2873 nodenum = noderef;
2874 goto bail;
2948 } 2875 }
2949 2876
2950 mlog(0, "giving up. no master to migrate to\n"); 2877bail:
2951 return DLM_LOCK_RES_OWNER_UNKNOWN; 2878 return nodenum;
2952} 2879}
2953 2880
2954
2955
2956/* this is called by the new master once all lockres 2881/* this is called by the new master once all lockres
2957 * data has been received */ 2882 * data has been received */
2958static int dlm_do_migrate_request(struct dlm_ctxt *dlm, 2883static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f1beb6fc254d..7efab6d28a21 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
2393 2393
2394 mlog(0, "node %u being removed from domain map!\n", idx); 2394 mlog(0, "node %u being removed from domain map!\n", idx);
2395 clear_bit(idx, dlm->domain_map); 2395 clear_bit(idx, dlm->domain_map);
2396 clear_bit(idx, dlm->exit_domain_map);
2396 /* wake up migration waiters if a node goes down. 2397 /* wake up migration waiters if a node goes down.
2397 * perhaps later we can genericize this for other waiters. */ 2398 * perhaps later we can genericize this for other waiters. */
2398 wake_up(&dlm->migration_wq); 2399 wake_up(&dlm->migration_wq);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 8c5c0eddc365..b42076797049 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker;
88 * signifies a bast fired on the lock. 88 * signifies a bast fired on the lock.
89 */ 89 */
90#define DLMFS_CAPABILITIES "bast stackglue" 90#define DLMFS_CAPABILITIES "bast stackglue"
91extern int param_set_dlmfs_capabilities(const char *val, 91static int param_set_dlmfs_capabilities(const char *val,
92 struct kernel_param *kp) 92 struct kernel_param *kp)
93{ 93{
94 printk(KERN_ERR "%s: readonly parameter\n", kp->name); 94 printk(KERN_ERR "%s: readonly parameter\n", kp->name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 89659d6dc206..b1e35a392ca5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
2670 .flock = ocfs2_flock, 2670 .flock = ocfs2_flock,
2671 .splice_read = ocfs2_file_splice_read, 2671 .splice_read = ocfs2_file_splice_read,
2672 .splice_write = ocfs2_file_splice_write, 2672 .splice_write = ocfs2_file_splice_write,
2673 .fallocate = ocfs2_fallocate,
2673}; 2674};
2674 2675
2675const struct file_operations ocfs2_dops_no_plocks = { 2676const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 8f13c5989eae..bc91072b7219 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -22,6 +22,11 @@
22#include "ioctl.h" 22#include "ioctl.h"
23#include "resize.h" 23#include "resize.h"
24#include "refcounttree.h" 24#include "refcounttree.h"
25#include "sysfile.h"
26#include "dir.h"
27#include "buffer_head_io.h"
28#include "suballoc.h"
29#include "move_extents.h"
25 30
26#include <linux/ext2_fs.h> 31#include <linux/ext2_fs.h>
27 32
@@ -35,31 +40,27 @@
35 * be -EFAULT. The error will be returned from the ioctl(2) call. It's 40 * be -EFAULT. The error will be returned from the ioctl(2) call. It's
36 * just a best-effort to tell userspace that this request caused the error. 41 * just a best-effort to tell userspace that this request caused the error.
37 */ 42 */
38static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, 43static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
39 struct ocfs2_info_request __user *req) 44 struct ocfs2_info_request __user *req)
40{ 45{
41 kreq->ir_flags |= OCFS2_INFO_FL_ERROR; 46 kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
42 (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); 47 (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
43} 48}
44 49
45#define o2info_set_request_error(a, b) \ 50static inline void o2info_set_request_filled(struct ocfs2_info_request *req)
46 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
47
48static inline void __o2info_set_request_filled(struct ocfs2_info_request *req)
49{ 51{
50 req->ir_flags |= OCFS2_INFO_FL_FILLED; 52 req->ir_flags |= OCFS2_INFO_FL_FILLED;
51} 53}
52 54
53#define o2info_set_request_filled(a) \ 55static inline void o2info_clear_request_filled(struct ocfs2_info_request *req)
54 __o2info_set_request_filled((struct ocfs2_info_request *)&(a))
55
56static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req)
57{ 56{
58 req->ir_flags &= ~OCFS2_INFO_FL_FILLED; 57 req->ir_flags &= ~OCFS2_INFO_FL_FILLED;
59} 58}
60 59
61#define o2info_clear_request_filled(a) \ 60static inline int o2info_coherent(struct ocfs2_info_request *req)
62 __o2info_clear_request_filled((struct ocfs2_info_request *)&(a)) 61{
62 return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT));
63}
63 64
64static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) 65static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
65{ 66{
@@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
153 154
154 oib.ib_blocksize = inode->i_sb->s_blocksize; 155 oib.ib_blocksize = inode->i_sb->s_blocksize;
155 156
156 o2info_set_request_filled(oib); 157 o2info_set_request_filled(&oib.ib_req);
157 158
158 if (o2info_to_user(oib, req)) 159 if (o2info_to_user(oib, req))
159 goto bail; 160 goto bail;
@@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
161 status = 0; 162 status = 0;
162bail: 163bail:
163 if (status) 164 if (status)
164 o2info_set_request_error(oib, req); 165 o2info_set_request_error(&oib.ib_req, req);
165 166
166 return status; 167 return status;
167} 168}
@@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
178 179
179 oic.ic_clustersize = osb->s_clustersize; 180 oic.ic_clustersize = osb->s_clustersize;
180 181
181 o2info_set_request_filled(oic); 182 o2info_set_request_filled(&oic.ic_req);
182 183
183 if (o2info_to_user(oic, req)) 184 if (o2info_to_user(oic, req))
184 goto bail; 185 goto bail;
@@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
186 status = 0; 187 status = 0;
187bail: 188bail:
188 if (status) 189 if (status)
189 o2info_set_request_error(oic, req); 190 o2info_set_request_error(&oic.ic_req, req);
190 191
191 return status; 192 return status;
192} 193}
@@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
203 204
204 oim.im_max_slots = osb->max_slots; 205 oim.im_max_slots = osb->max_slots;
205 206
206 o2info_set_request_filled(oim); 207 o2info_set_request_filled(&oim.im_req);
207 208
208 if (o2info_to_user(oim, req)) 209 if (o2info_to_user(oim, req))
209 goto bail; 210 goto bail;
@@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
211 status = 0; 212 status = 0;
212bail: 213bail:
213 if (status) 214 if (status)
214 o2info_set_request_error(oim, req); 215 o2info_set_request_error(&oim.im_req, req);
215 216
216 return status; 217 return status;
217} 218}
@@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode,
228 229
229 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); 230 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
230 231
231 o2info_set_request_filled(oil); 232 o2info_set_request_filled(&oil.il_req);
232 233
233 if (o2info_to_user(oil, req)) 234 if (o2info_to_user(oil, req))
234 goto bail; 235 goto bail;
@@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode,
236 status = 0; 237 status = 0;
237bail: 238bail:
238 if (status) 239 if (status)
239 o2info_set_request_error(oil, req); 240 o2info_set_request_error(&oil.il_req, req);
240 241
241 return status; 242 return status;
242} 243}
@@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
253 254
254 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); 255 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
255 256
256 o2info_set_request_filled(oiu); 257 o2info_set_request_filled(&oiu.iu_req);
257 258
258 if (o2info_to_user(oiu, req)) 259 if (o2info_to_user(oiu, req))
259 goto bail; 260 goto bail;
@@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
261 status = 0; 262 status = 0;
262bail: 263bail:
263 if (status) 264 if (status)
264 o2info_set_request_error(oiu, req); 265 o2info_set_request_error(&oiu.iu_req, req);
265 266
266 return status; 267 return status;
267} 268}
@@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
280 oif.if_incompat_features = osb->s_feature_incompat; 281 oif.if_incompat_features = osb->s_feature_incompat;
281 oif.if_ro_compat_features = osb->s_feature_ro_compat; 282 oif.if_ro_compat_features = osb->s_feature_ro_compat;
282 283
283 o2info_set_request_filled(oif); 284 o2info_set_request_filled(&oif.if_req);
284 285
285 if (o2info_to_user(oif, req)) 286 if (o2info_to_user(oif, req))
286 goto bail; 287 goto bail;
@@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
288 status = 0; 289 status = 0;
289bail: 290bail:
290 if (status) 291 if (status)
291 o2info_set_request_error(oif, req); 292 o2info_set_request_error(&oif.if_req, req);
292 293
293 return status; 294 return status;
294} 295}
@@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
305 306
306 oij.ij_journal_size = osb->journal->j_inode->i_size; 307 oij.ij_journal_size = osb->journal->j_inode->i_size;
307 308
308 o2info_set_request_filled(oij); 309 o2info_set_request_filled(&oij.ij_req);
309 310
310 if (o2info_to_user(oij, req)) 311 if (o2info_to_user(oij, req))
311 goto bail; 312 goto bail;
@@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
313 status = 0; 314 status = 0;
314bail: 315bail:
315 if (status) 316 if (status)
316 o2info_set_request_error(oij, req); 317 o2info_set_request_error(&oij.ij_req, req);
318
319 return status;
320}
321
322int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
323 struct inode *inode_alloc, u64 blkno,
324 struct ocfs2_info_freeinode *fi, u32 slot)
325{
326 int status = 0, unlock = 0;
327
328 struct buffer_head *bh = NULL;
329 struct ocfs2_dinode *dinode_alloc = NULL;
330
331 if (inode_alloc)
332 mutex_lock(&inode_alloc->i_mutex);
333
334 if (o2info_coherent(&fi->ifi_req)) {
335 status = ocfs2_inode_lock(inode_alloc, &bh, 0);
336 if (status < 0) {
337 mlog_errno(status);
338 goto bail;
339 }
340 unlock = 1;
341 } else {
342 status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
343 if (status < 0) {
344 mlog_errno(status);
345 goto bail;
346 }
347 }
348
349 dinode_alloc = (struct ocfs2_dinode *)bh->b_data;
350
351 fi->ifi_stat[slot].lfi_total =
352 le32_to_cpu(dinode_alloc->id1.bitmap1.i_total);
353 fi->ifi_stat[slot].lfi_free =
354 le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) -
355 le32_to_cpu(dinode_alloc->id1.bitmap1.i_used);
356
357bail:
358 if (unlock)
359 ocfs2_inode_unlock(inode_alloc, 0);
360
361 if (inode_alloc)
362 mutex_unlock(&inode_alloc->i_mutex);
363
364 brelse(bh);
365
366 return status;
367}
368
369int ocfs2_info_handle_freeinode(struct inode *inode,
370 struct ocfs2_info_request __user *req)
371{
372 u32 i;
373 u64 blkno = -1;
374 char namebuf[40];
375 int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
376 struct ocfs2_info_freeinode *oifi = NULL;
377 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
378 struct inode *inode_alloc = NULL;
379
380 oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL);
381 if (!oifi) {
382 status = -ENOMEM;
383 mlog_errno(status);
384 goto bail;
385 }
386
387 if (o2info_from_user(*oifi, req))
388 goto bail;
389
390 oifi->ifi_slotnum = osb->max_slots;
391
392 for (i = 0; i < oifi->ifi_slotnum; i++) {
393 if (o2info_coherent(&oifi->ifi_req)) {
394 inode_alloc = ocfs2_get_system_file_inode(osb, type, i);
395 if (!inode_alloc) {
396 mlog(ML_ERROR, "unable to get alloc inode in "
397 "slot %u\n", i);
398 status = -EIO;
399 goto bail;
400 }
401 } else {
402 ocfs2_sprintf_system_inode_name(namebuf,
403 sizeof(namebuf),
404 type, i);
405 status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
406 namebuf,
407 strlen(namebuf),
408 &blkno);
409 if (status < 0) {
410 status = -ENOENT;
411 goto bail;
412 }
413 }
414
415 status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i);
416 if (status < 0)
417 goto bail;
418
419 iput(inode_alloc);
420 inode_alloc = NULL;
421 }
422
423 o2info_set_request_filled(&oifi->ifi_req);
424
425 if (o2info_to_user(*oifi, req))
426 goto bail;
427
428 status = 0;
429bail:
430 if (status)
431 o2info_set_request_error(&oifi->ifi_req, req);
432
433 kfree(oifi);
434
435 return status;
436}
437
438static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist,
439 unsigned int chunksize)
440{
441 int index;
442
443 index = __ilog2_u32(chunksize);
444 if (index >= OCFS2_INFO_MAX_HIST)
445 index = OCFS2_INFO_MAX_HIST - 1;
446
447 hist->fc_chunks[index]++;
448 hist->fc_clusters[index] += chunksize;
449}
450
451static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats,
452 unsigned int chunksize)
453{
454 if (chunksize > stats->ffs_max)
455 stats->ffs_max = chunksize;
456
457 if (chunksize < stats->ffs_min)
458 stats->ffs_min = chunksize;
459
460 stats->ffs_avg += chunksize;
461 stats->ffs_free_chunks_real++;
462}
463
464void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg,
465 unsigned int chunksize)
466{
467 o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize);
468 o2ffg_update_stats(&(ffg->iff_ffs), chunksize);
469}
470
471int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
472 struct inode *gb_inode,
473 struct ocfs2_dinode *gb_dinode,
474 struct ocfs2_chain_rec *rec,
475 struct ocfs2_info_freefrag *ffg,
476 u32 chunks_in_group)
477{
478 int status = 0, used;
479 u64 blkno;
480
481 struct buffer_head *bh = NULL;
482 struct ocfs2_group_desc *bg = NULL;
483
484 unsigned int max_bits, num_clusters;
485 unsigned int offset = 0, cluster, chunk;
486 unsigned int chunk_free, last_chunksize = 0;
487
488 if (!le32_to_cpu(rec->c_free))
489 goto bail;
490
491 do {
492 if (!bg)
493 blkno = le64_to_cpu(rec->c_blkno);
494 else
495 blkno = le64_to_cpu(bg->bg_next_group);
496
497 if (bh) {
498 brelse(bh);
499 bh = NULL;
500 }
501
502 if (o2info_coherent(&ffg->iff_req))
503 status = ocfs2_read_group_descriptor(gb_inode,
504 gb_dinode,
505 blkno, &bh);
506 else
507 status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
508
509 if (status < 0) {
510 mlog(ML_ERROR, "Can't read the group descriptor # "
511 "%llu from device.", (unsigned long long)blkno);
512 status = -EIO;
513 goto bail;
514 }
515
516 bg = (struct ocfs2_group_desc *)bh->b_data;
517
518 if (!le16_to_cpu(bg->bg_free_bits_count))
519 continue;
520
521 max_bits = le16_to_cpu(bg->bg_bits);
522 offset = 0;
523
524 for (chunk = 0; chunk < chunks_in_group; chunk++) {
525 /*
526 * last chunk may be not an entire one.
527 */
528 if ((offset + ffg->iff_chunksize) > max_bits)
529 num_clusters = max_bits - offset;
530 else
531 num_clusters = ffg->iff_chunksize;
532
533 chunk_free = 0;
534 for (cluster = 0; cluster < num_clusters; cluster++) {
535 used = ocfs2_test_bit(offset,
536 (unsigned long *)bg->bg_bitmap);
537 /*
538 * - chunk_free counts free clusters in #N chunk.
539 * - last_chunksize records the size(in) clusters
540 * for the last real free chunk being counted.
541 */
542 if (!used) {
543 last_chunksize++;
544 chunk_free++;
545 }
546
547 if (used && last_chunksize) {
548 ocfs2_info_update_ffg(ffg,
549 last_chunksize);
550 last_chunksize = 0;
551 }
552
553 offset++;
554 }
555
556 if (chunk_free == ffg->iff_chunksize)
557 ffg->iff_ffs.ffs_free_chunks++;
558 }
559
560 /*
561 * need to update the info for last free chunk.
562 */
563 if (last_chunksize)
564 ocfs2_info_update_ffg(ffg, last_chunksize);
565
566 } while (le64_to_cpu(bg->bg_next_group));
567
568bail:
569 brelse(bh);
570
571 return status;
572}
573
574int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
575 struct inode *gb_inode, u64 blkno,
576 struct ocfs2_info_freefrag *ffg)
577{
578 u32 chunks_in_group;
579 int status = 0, unlock = 0, i;
580
581 struct buffer_head *bh = NULL;
582 struct ocfs2_chain_list *cl = NULL;
583 struct ocfs2_chain_rec *rec = NULL;
584 struct ocfs2_dinode *gb_dinode = NULL;
585
586 if (gb_inode)
587 mutex_lock(&gb_inode->i_mutex);
588
589 if (o2info_coherent(&ffg->iff_req)) {
590 status = ocfs2_inode_lock(gb_inode, &bh, 0);
591 if (status < 0) {
592 mlog_errno(status);
593 goto bail;
594 }
595 unlock = 1;
596 } else {
597 status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
598 if (status < 0) {
599 mlog_errno(status);
600 goto bail;
601 }
602 }
603
604 gb_dinode = (struct ocfs2_dinode *)bh->b_data;
605 cl = &(gb_dinode->id2.i_chain);
606
607 /*
608 * Chunksize(in) clusters from userspace should be
609 * less than clusters in a group.
610 */
611 if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) {
612 status = -EINVAL;
613 goto bail;
614 }
615
616 memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats));
617
618 ffg->iff_ffs.ffs_min = ~0U;
619 ffg->iff_ffs.ffs_clusters =
620 le32_to_cpu(gb_dinode->id1.bitmap1.i_total);
621 ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters -
622 le32_to_cpu(gb_dinode->id1.bitmap1.i_used);
623
624 chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1;
625
626 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
627 rec = &(cl->cl_recs[i]);
628 status = ocfs2_info_freefrag_scan_chain(osb, gb_inode,
629 gb_dinode,
630 rec, ffg,
631 chunks_in_group);
632 if (status)
633 goto bail;
634 }
635
636 if (ffg->iff_ffs.ffs_free_chunks_real)
637 ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg /
638 ffg->iff_ffs.ffs_free_chunks_real);
639bail:
640 if (unlock)
641 ocfs2_inode_unlock(gb_inode, 0);
642
643 if (gb_inode)
644 mutex_unlock(&gb_inode->i_mutex);
645
646 if (gb_inode)
647 iput(gb_inode);
648
649 brelse(bh);
650
651 return status;
652}
653
654int ocfs2_info_handle_freefrag(struct inode *inode,
655 struct ocfs2_info_request __user *req)
656{
657 u64 blkno = -1;
658 char namebuf[40];
659 int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
660
661 struct ocfs2_info_freefrag *oiff;
662 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
663 struct inode *gb_inode = NULL;
664
665 oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL);
666 if (!oiff) {
667 status = -ENOMEM;
668 mlog_errno(status);
669 goto bail;
670 }
671
672 if (o2info_from_user(*oiff, req))
673 goto bail;
674 /*
675 * chunksize from userspace should be power of 2.
676 */
677 if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) ||
678 (!oiff->iff_chunksize)) {
679 status = -EINVAL;
680 goto bail;
681 }
682
683 if (o2info_coherent(&oiff->iff_req)) {
684 gb_inode = ocfs2_get_system_file_inode(osb, type,
685 OCFS2_INVALID_SLOT);
686 if (!gb_inode) {
687 mlog(ML_ERROR, "unable to get global_bitmap inode\n");
688 status = -EIO;
689 goto bail;
690 }
691 } else {
692 ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
693 OCFS2_INVALID_SLOT);
694 status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
695 namebuf,
696 strlen(namebuf),
697 &blkno);
698 if (status < 0) {
699 status = -ENOENT;
700 goto bail;
701 }
702 }
703
704 status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff);
705 if (status < 0)
706 goto bail;
707
708 o2info_set_request_filled(&oiff->iff_req);
709
710 if (o2info_to_user(*oiff, req))
711 goto bail;
712
713 status = 0;
714bail:
715 if (status)
716 o2info_set_request_error(&oiff->iff_req, req);
717
718 kfree(oiff);
317 719
318 return status; 720 return status;
319} 721}
@@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
327 if (o2info_from_user(oir, req)) 729 if (o2info_from_user(oir, req))
328 goto bail; 730 goto bail;
329 731
330 o2info_clear_request_filled(oir); 732 o2info_clear_request_filled(&oir);
331 733
332 if (o2info_to_user(oir, req)) 734 if (o2info_to_user(oir, req))
333 goto bail; 735 goto bail;
@@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
335 status = 0; 737 status = 0;
336bail: 738bail:
337 if (status) 739 if (status)
338 o2info_set_request_error(oir, req); 740 o2info_set_request_error(&oir, req);
339 741
340 return status; 742 return status;
341} 743}
@@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode,
389 if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) 791 if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
390 status = ocfs2_info_handle_journal_size(inode, req); 792 status = ocfs2_info_handle_journal_size(inode, req);
391 break; 793 break;
794 case OCFS2_INFO_FREEINODE:
795 if (oir.ir_size == sizeof(struct ocfs2_info_freeinode))
796 status = ocfs2_info_handle_freeinode(inode, req);
797 break;
798 case OCFS2_INFO_FREEFRAG:
799 if (oir.ir_size == sizeof(struct ocfs2_info_freefrag))
800 status = ocfs2_info_handle_freefrag(inode, req);
801 break;
392 default: 802 default:
393 status = ocfs2_info_handle_unknown(inode, req); 803 status = ocfs2_info_handle_unknown(inode, req);
394 break; 804 break;
@@ -542,6 +952,31 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
542 return -EFAULT; 952 return -EFAULT;
543 953
544 return ocfs2_info_handle(inode, &info, 0); 954 return ocfs2_info_handle(inode, &info, 0);
955 case FITRIM:
956 {
957 struct super_block *sb = inode->i_sb;
958 struct fstrim_range range;
959 int ret = 0;
960
961 if (!capable(CAP_SYS_ADMIN))
962 return -EPERM;
963
964 if (copy_from_user(&range, (struct fstrim_range *)arg,
965 sizeof(range)))
966 return -EFAULT;
967
968 ret = ocfs2_trim_fs(sb, &range);
969 if (ret < 0)
970 return ret;
971
972 if (copy_to_user((struct fstrim_range *)arg, &range,
973 sizeof(range)))
974 return -EFAULT;
975
976 return 0;
977 }
978 case OCFS2_IOC_MOVE_EXT:
979 return ocfs2_ioctl_move_extents(filp, (void __user *)arg);
545 default: 980 default:
546 return -ENOTTY; 981 return -ENOTTY;
547 } 982 }
@@ -569,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
569 case OCFS2_IOC_GROUP_EXTEND: 1004 case OCFS2_IOC_GROUP_EXTEND:
570 case OCFS2_IOC_GROUP_ADD: 1005 case OCFS2_IOC_GROUP_ADD:
571 case OCFS2_IOC_GROUP_ADD64: 1006 case OCFS2_IOC_GROUP_ADD64:
1007 case FITRIM:
572 break; 1008 break;
573 case OCFS2_IOC_REFLINK: 1009 case OCFS2_IOC_REFLINK:
574 if (copy_from_user(&args, (struct reflink_arguments *)arg, 1010 if (copy_from_user(&args, (struct reflink_arguments *)arg,
@@ -584,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
584 return -EFAULT; 1020 return -EFAULT;
585 1021
586 return ocfs2_info_handle(inode, &info, 1); 1022 return ocfs2_info_handle(inode, &info, 1);
1023 case OCFS2_IOC_MOVE_EXT:
1024 break;
587 default: 1025 default:
588 return -ENOIOCTLCMD; 1026 return -ENOIOCTLCMD;
589 } 1027 }
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
new file mode 100644
index 000000000000..cd9427023d2e
--- /dev/null
+++ b/fs/ocfs2/move_extents.c
@@ -0,0 +1,1152 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * move_extents.c
5 *
6 * Copyright (C) 2011 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17#include <linux/fs.h>
18#include <linux/types.h>
19#include <linux/mount.h>
20#include <linux/swap.h>
21
22#include <cluster/masklog.h>
23
24#include "ocfs2.h"
25#include "ocfs2_ioctl.h"
26
27#include "alloc.h"
28#include "aops.h"
29#include "dlmglue.h"
30#include "extent_map.h"
31#include "inode.h"
32#include "journal.h"
33#include "suballoc.h"
34#include "uptodate.h"
35#include "super.h"
36#include "dir.h"
37#include "buffer_head_io.h"
38#include "sysfile.h"
39#include "suballoc.h"
40#include "refcounttree.h"
41#include "move_extents.h"
42
43struct ocfs2_move_extents_context {
44 struct inode *inode;
45 struct file *file;
46 int auto_defrag;
47 int partial;
48 int credits;
49 u32 new_phys_cpos;
50 u32 clusters_moved;
51 u64 refcount_loc;
52 struct ocfs2_move_extents *range;
53 struct ocfs2_extent_tree et;
54 struct ocfs2_alloc_context *meta_ac;
55 struct ocfs2_alloc_context *data_ac;
56 struct ocfs2_cached_dealloc_ctxt dealloc;
57};
58
59static int __ocfs2_move_extent(handle_t *handle,
60 struct ocfs2_move_extents_context *context,
61 u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos,
62 int ext_flags)
63{
64 int ret = 0, index;
65 struct inode *inode = context->inode;
66 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
67 struct ocfs2_extent_rec *rec, replace_rec;
68 struct ocfs2_path *path = NULL;
69 struct ocfs2_extent_list *el;
70 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
71 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
72
73 ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos,
74 p_cpos, new_p_cpos, len);
75 if (ret) {
76 mlog_errno(ret);
77 goto out;
78 }
79
80 memset(&replace_rec, 0, sizeof(replace_rec));
81 replace_rec.e_cpos = cpu_to_le32(cpos);
82 replace_rec.e_leaf_clusters = cpu_to_le16(len);
83 replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
84 new_p_cpos));
85
86 path = ocfs2_new_path_from_et(&context->et);
87 if (!path) {
88 ret = -ENOMEM;
89 mlog_errno(ret);
90 goto out;
91 }
92
93 ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos);
94 if (ret) {
95 mlog_errno(ret);
96 goto out;
97 }
98
99 el = path_leaf_el(path);
100
101 index = ocfs2_search_extent_list(el, cpos);
102 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
103 ocfs2_error(inode->i_sb,
104 "Inode %llu has an extent at cpos %u which can no "
105 "longer be found.\n",
106 (unsigned long long)ino, cpos);
107 ret = -EROFS;
108 goto out;
109 }
110
111 rec = &el->l_recs[index];
112
113 BUG_ON(ext_flags != rec->e_flags);
114 /*
115 * after moving/defraging to new location, the extent is not going
116 * to be refcounted anymore.
117 */
118 replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
119
120 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
121 context->et.et_root_bh,
122 OCFS2_JOURNAL_ACCESS_WRITE);
123 if (ret) {
124 mlog_errno(ret);
125 goto out;
126 }
127
128 ret = ocfs2_split_extent(handle, &context->et, path, index,
129 &replace_rec, context->meta_ac,
130 &context->dealloc);
131 if (ret) {
132 mlog_errno(ret);
133 goto out;
134 }
135
136 ocfs2_journal_dirty(handle, context->et.et_root_bh);
137
138 context->new_phys_cpos = new_p_cpos;
139
140 /*
141 * need I to append truncate log for old clusters?
142 */
143 if (old_blkno) {
144 if (ext_flags & OCFS2_EXT_REFCOUNTED)
145 ret = ocfs2_decrease_refcount(inode, handle,
146 ocfs2_blocks_to_clusters(osb->sb,
147 old_blkno),
148 len, context->meta_ac,
149 &context->dealloc, 1);
150 else
151 ret = ocfs2_truncate_log_append(osb, handle,
152 old_blkno, len);
153 }
154
155out:
156 return ret;
157}
158
159/*
160 * lock allocators, and reserving appropriate number of bits for
161 * meta blocks and data clusters.
162 *
163 * in some cases, we don't need to reserve clusters, just let data_ac
164 * be NULL.
165 */
166static int ocfs2_lock_allocators_move_extents(struct inode *inode,
167 struct ocfs2_extent_tree *et,
168 u32 clusters_to_move,
169 u32 extents_to_split,
170 struct ocfs2_alloc_context **meta_ac,
171 struct ocfs2_alloc_context **data_ac,
172 int extra_blocks,
173 int *credits)
174{
175 int ret, num_free_extents;
176 unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move;
177 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
178
179 num_free_extents = ocfs2_num_free_extents(osb, et);
180 if (num_free_extents < 0) {
181 ret = num_free_extents;
182 mlog_errno(ret);
183 goto out;
184 }
185
186 if (!num_free_extents ||
187 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
188 extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
189
190 ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac);
191 if (ret) {
192 mlog_errno(ret);
193 goto out;
194 }
195
196 if (data_ac) {
197 ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
198 if (ret) {
199 mlog_errno(ret);
200 goto out;
201 }
202 }
203
204 *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el,
205 clusters_to_move + 2);
206
207 mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
208 extra_blocks, clusters_to_move, *credits);
209out:
210 if (ret) {
211 if (*meta_ac) {
212 ocfs2_free_alloc_context(*meta_ac);
213 *meta_ac = NULL;
214 }
215 }
216
217 return ret;
218}
219
220/*
221 * Using one journal handle to guarantee the data consistency in case
222 * crash happens anywhere.
223 *
224 * XXX: defrag can end up with finishing partial extent as requested,
225 * due to not enough contiguous clusters can be found in allocator.
226 */
227static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
228 u32 cpos, u32 phys_cpos, u32 *len, int ext_flags)
229{
230 int ret, credits = 0, extra_blocks = 0, partial = context->partial;
231 handle_t *handle;
232 struct inode *inode = context->inode;
233 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
234 struct inode *tl_inode = osb->osb_tl_inode;
235 struct ocfs2_refcount_tree *ref_tree = NULL;
236 u32 new_phys_cpos, new_len;
237 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
238
239 if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
240
241 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
242 OCFS2_HAS_REFCOUNT_FL));
243
244 BUG_ON(!context->refcount_loc);
245
246 ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
247 &ref_tree, NULL);
248 if (ret) {
249 mlog_errno(ret);
250 return ret;
251 }
252
253 ret = ocfs2_prepare_refcount_change_for_del(inode,
254 context->refcount_loc,
255 phys_blkno,
256 *len,
257 &credits,
258 &extra_blocks);
259 if (ret) {
260 mlog_errno(ret);
261 goto out;
262 }
263 }
264
265 ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
266 &context->meta_ac,
267 &context->data_ac,
268 extra_blocks, &credits);
269 if (ret) {
270 mlog_errno(ret);
271 goto out;
272 }
273
274 /*
275 * should be using allocation reservation strategy there?
276 *
277 * if (context->data_ac)
278 * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
279 */
280
281 mutex_lock(&tl_inode->i_mutex);
282
283 if (ocfs2_truncate_log_needs_flush(osb)) {
284 ret = __ocfs2_flush_truncate_log(osb);
285 if (ret < 0) {
286 mlog_errno(ret);
287 goto out_unlock_mutex;
288 }
289 }
290
291 handle = ocfs2_start_trans(osb, credits);
292 if (IS_ERR(handle)) {
293 ret = PTR_ERR(handle);
294 mlog_errno(ret);
295 goto out_unlock_mutex;
296 }
297
298 ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len,
299 &new_phys_cpos, &new_len);
300 if (ret) {
301 mlog_errno(ret);
302 goto out_commit;
303 }
304
305 /*
306 * allowing partial extent moving is kind of 'pros and cons', it makes
307 * whole defragmentation less likely to fail, on the contrary, the bad
308 * thing is it may make the fs even more fragmented after moving, let
309 * userspace make a good decision here.
310 */
311 if (new_len != *len) {
312 mlog(0, "len_claimed: %u, len: %u\n", new_len, *len);
313 if (!partial) {
314 context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
315 ret = -ENOSPC;
316 goto out_commit;
317 }
318 }
319
320 mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos,
321 phys_cpos, new_phys_cpos);
322
323 ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos,
324 new_phys_cpos, ext_flags);
325 if (ret)
326 mlog_errno(ret);
327
328 if (partial && (new_len != *len))
329 *len = new_len;
330
331 /*
332 * Here we should write the new page out first if we are
333 * in write-back mode.
334 */
335 ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len);
336 if (ret)
337 mlog_errno(ret);
338
339out_commit:
340 ocfs2_commit_trans(osb, handle);
341
342out_unlock_mutex:
343 mutex_unlock(&tl_inode->i_mutex);
344
345 if (context->data_ac) {
346 ocfs2_free_alloc_context(context->data_ac);
347 context->data_ac = NULL;
348 }
349
350 if (context->meta_ac) {
351 ocfs2_free_alloc_context(context->meta_ac);
352 context->meta_ac = NULL;
353 }
354
355out:
356 if (ref_tree)
357 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
358
359 return ret;
360}
361
362/*
363 * find the victim alloc group, where #blkno fits.
364 */
365static int ocfs2_find_victim_alloc_group(struct inode *inode,
366 u64 vict_blkno,
367 int type, int slot,
368 int *vict_bit,
369 struct buffer_head **ret_bh)
370{
371 int ret, i, bits_per_unit = 0;
372 u64 blkno;
373 char namebuf[40];
374
375 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
376 struct buffer_head *ac_bh = NULL, *gd_bh = NULL;
377 struct ocfs2_chain_list *cl;
378 struct ocfs2_chain_rec *rec;
379 struct ocfs2_dinode *ac_dinode;
380 struct ocfs2_group_desc *bg;
381
382 ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
383 ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
384 strlen(namebuf), &blkno);
385 if (ret) {
386 ret = -ENOENT;
387 goto out;
388 }
389
390 ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh);
391 if (ret) {
392 mlog_errno(ret);
393 goto out;
394 }
395
396 ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data;
397 cl = &(ac_dinode->id2.i_chain);
398 rec = &(cl->cl_recs[0]);
399
400 if (type == GLOBAL_BITMAP_SYSTEM_INODE)
401 bits_per_unit = osb->s_clustersize_bits -
402 inode->i_sb->s_blocksize_bits;
403 /*
404 * 'vict_blkno' was out of the valid range.
405 */
406 if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
407 (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
408 bits_per_unit))) {
409 ret = -EINVAL;
410 goto out;
411 }
412
413 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
414
415 rec = &(cl->cl_recs[i]);
416 if (!rec)
417 continue;
418
419 bg = NULL;
420
421 do {
422 if (!bg)
423 blkno = le64_to_cpu(rec->c_blkno);
424 else
425 blkno = le64_to_cpu(bg->bg_next_group);
426
427 if (gd_bh) {
428 brelse(gd_bh);
429 gd_bh = NULL;
430 }
431
432 ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh);
433 if (ret) {
434 mlog_errno(ret);
435 goto out;
436 }
437
438 bg = (struct ocfs2_group_desc *)gd_bh->b_data;
439
440 if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
441 le16_to_cpu(bg->bg_bits))) {
442
443 *ret_bh = gd_bh;
444 *vict_bit = (vict_blkno - blkno) >>
445 bits_per_unit;
446 mlog(0, "find the victim group: #%llu, "
447 "total_bits: %u, vict_bit: %u\n",
448 blkno, le16_to_cpu(bg->bg_bits),
449 *vict_bit);
450 goto out;
451 }
452
453 } while (le64_to_cpu(bg->bg_next_group));
454 }
455
456 ret = -EINVAL;
457out:
458 brelse(ac_bh);
459
460 /*
461 * caller has to release the gd_bh properly.
462 */
463 return ret;
464}
465
466/*
467 * XXX: helper to validate and adjust moving goal.
468 */
469static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
470 struct ocfs2_move_extents *range)
471{
472 int ret, goal_bit = 0;
473
474 struct buffer_head *gd_bh = NULL;
475 struct ocfs2_group_desc *bg = NULL;
476 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
477 int c_to_b = 1 << (osb->s_clustersize_bits -
478 inode->i_sb->s_blocksize_bits);
479
480 /*
481 * make goal become cluster aligned.
482 */
483 range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
484 range->me_goal);
485 /*
486 * moving goal is not allowd to start with a group desc blok(#0 blk)
487 * let's compromise to the latter cluster.
488 */
489 if (range->me_goal == le64_to_cpu(bg->bg_blkno))
490 range->me_goal += c_to_b;
491
492 /*
493 * validate goal sits within global_bitmap, and return the victim
494 * group desc
495 */
496 ret = ocfs2_find_victim_alloc_group(inode, range->me_goal,
497 GLOBAL_BITMAP_SYSTEM_INODE,
498 OCFS2_INVALID_SLOT,
499 &goal_bit, &gd_bh);
500 if (ret)
501 goto out;
502
503 bg = (struct ocfs2_group_desc *)gd_bh->b_data;
504
505 /*
506 * movement is not gonna cross two groups.
507 */
508 if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize <
509 range->me_len) {
510 ret = -EINVAL;
511 goto out;
512 }
513 /*
514 * more exact validations/adjustments will be performed later during
515 * moving operation for each extent range.
516 */
517 mlog(0, "extents get ready to be moved to #%llu block\n",
518 range->me_goal);
519
520out:
521 brelse(gd_bh);
522
523 return ret;
524}
525
526static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
527 int *goal_bit, u32 move_len, u32 max_hop,
528 u32 *phys_cpos)
529{
530 int i, used, last_free_bits = 0, base_bit = *goal_bit;
531 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
532 u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
533 le64_to_cpu(gd->bg_blkno));
534
535 for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) {
536
537 used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap);
538 if (used) {
539 /*
540 * we even tried searching the free chunk by jumping
541 * a 'max_hop' distance, but still failed.
542 */
543 if ((i - base_bit) > max_hop) {
544 *phys_cpos = 0;
545 break;
546 }
547
548 if (last_free_bits)
549 last_free_bits = 0;
550
551 continue;
552 } else
553 last_free_bits++;
554
555 if (last_free_bits == move_len) {
556 *goal_bit = i;
557 *phys_cpos = base_cpos + i;
558 break;
559 }
560 }
561
562 mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
563}
564
565static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
566 handle_t *handle,
567 struct buffer_head *di_bh,
568 u32 num_bits,
569 u16 chain)
570{
571 int ret;
572 u32 tmp_used;
573 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
574 struct ocfs2_chain_list *cl =
575 (struct ocfs2_chain_list *) &di->id2.i_chain;
576
577 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
578 OCFS2_JOURNAL_ACCESS_WRITE);
579 if (ret < 0) {
580 mlog_errno(ret);
581 goto out;
582 }
583
584 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
585 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
586 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
587 ocfs2_journal_dirty(handle, di_bh);
588
589out:
590 return ret;
591}
592
593static inline int ocfs2_block_group_set_bits(handle_t *handle,
594 struct inode *alloc_inode,
595 struct ocfs2_group_desc *bg,
596 struct buffer_head *group_bh,
597 unsigned int bit_off,
598 unsigned int num_bits)
599{
600 int status;
601 void *bitmap = bg->bg_bitmap;
602 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
603
604 /* All callers get the descriptor via
605 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
606 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
607 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
608
609 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
610 num_bits);
611
612 if (ocfs2_is_cluster_bitmap(alloc_inode))
613 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
614
615 status = ocfs2_journal_access_gd(handle,
616 INODE_CACHE(alloc_inode),
617 group_bh,
618 journal_type);
619 if (status < 0) {
620 mlog_errno(status);
621 goto bail;
622 }
623
624 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
625 if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
626 ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
627 " count %u but claims %u are freed. num_bits %d",
628 (unsigned long long)le64_to_cpu(bg->bg_blkno),
629 le16_to_cpu(bg->bg_bits),
630 le16_to_cpu(bg->bg_free_bits_count), num_bits);
631 return -EROFS;
632 }
633 while (num_bits--)
634 ocfs2_set_bit(bit_off++, bitmap);
635
636 ocfs2_journal_dirty(handle, group_bh);
637
638bail:
639 return status;
640}
641
642static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
643 u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
644 u32 len, int ext_flags)
645{
646 int ret, credits = 0, extra_blocks = 0, goal_bit = 0;
647 handle_t *handle;
648 struct inode *inode = context->inode;
649 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
650 struct inode *tl_inode = osb->osb_tl_inode;
651 struct inode *gb_inode = NULL;
652 struct buffer_head *gb_bh = NULL;
653 struct buffer_head *gd_bh = NULL;
654 struct ocfs2_group_desc *gd;
655 struct ocfs2_refcount_tree *ref_tree = NULL;
656 u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb,
657 context->range->me_threshold);
658 u64 phys_blkno, new_phys_blkno;
659
660 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
661
662 if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
663
664 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
665 OCFS2_HAS_REFCOUNT_FL));
666
667 BUG_ON(!context->refcount_loc);
668
669 ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
670 &ref_tree, NULL);
671 if (ret) {
672 mlog_errno(ret);
673 return ret;
674 }
675
676 ret = ocfs2_prepare_refcount_change_for_del(inode,
677 context->refcount_loc,
678 phys_blkno,
679 len,
680 &credits,
681 &extra_blocks);
682 if (ret) {
683 mlog_errno(ret);
684 goto out;
685 }
686 }
687
688 ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
689 &context->meta_ac,
690 NULL, extra_blocks, &credits);
691 if (ret) {
692 mlog_errno(ret);
693 goto out;
694 }
695
696 /*
697 * need to count 2 extra credits for global_bitmap inode and
698 * group descriptor.
699 */
700 credits += OCFS2_INODE_UPDATE_CREDITS + 1;
701
702 /*
703 * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
704 * logic, while we still need to lock the global_bitmap.
705 */
706 gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
707 OCFS2_INVALID_SLOT);
708 if (!gb_inode) {
709 mlog(ML_ERROR, "unable to get global_bitmap inode\n");
710 ret = -EIO;
711 goto out;
712 }
713
714 mutex_lock(&gb_inode->i_mutex);
715
716 ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
717 if (ret) {
718 mlog_errno(ret);
719 goto out_unlock_gb_mutex;
720 }
721
722 mutex_lock(&tl_inode->i_mutex);
723
724 handle = ocfs2_start_trans(osb, credits);
725 if (IS_ERR(handle)) {
726 ret = PTR_ERR(handle);
727 mlog_errno(ret);
728 goto out_unlock_tl_inode;
729 }
730
731 new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
732 ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno,
733 GLOBAL_BITMAP_SYSTEM_INODE,
734 OCFS2_INVALID_SLOT,
735 &goal_bit, &gd_bh);
736 if (ret) {
737 mlog_errno(ret);
738 goto out_commit;
739 }
740
741 /*
742 * probe the victim cluster group to find a proper
743 * region to fit wanted movement, it even will perfrom
744 * a best-effort attempt by compromising to a threshold
745 * around the goal.
746 */
747 ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop,
748 new_phys_cpos);
749 if (!new_phys_cpos) {
750 ret = -ENOSPC;
751 goto out_commit;
752 }
753
754 ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos,
755 *new_phys_cpos, ext_flags);
756 if (ret) {
757 mlog_errno(ret);
758 goto out_commit;
759 }
760
761 gd = (struct ocfs2_group_desc *)gd_bh->b_data;
762 ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len,
763 le16_to_cpu(gd->bg_chain));
764 if (ret) {
765 mlog_errno(ret);
766 goto out_commit;
767 }
768
769 ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
770 goal_bit, len);
771 if (ret)
772 mlog_errno(ret);
773
774 /*
775 * Here we should write the new page out first if we are
776 * in write-back mode.
777 */
778 ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len);
779 if (ret)
780 mlog_errno(ret);
781
782out_commit:
783 ocfs2_commit_trans(osb, handle);
784 brelse(gd_bh);
785
786out_unlock_tl_inode:
787 mutex_unlock(&tl_inode->i_mutex);
788
789 ocfs2_inode_unlock(gb_inode, 1);
790out_unlock_gb_mutex:
791 mutex_unlock(&gb_inode->i_mutex);
792 brelse(gb_bh);
793 iput(gb_inode);
794
795out:
796 if (context->meta_ac) {
797 ocfs2_free_alloc_context(context->meta_ac);
798 context->meta_ac = NULL;
799 }
800
801 if (ref_tree)
802 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
803
804 return ret;
805}
806
807/*
808 * Helper to calculate the defraging length in one run according to threshold.
809 */
810static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged,
811 u32 threshold, int *skip)
812{
813 if ((*alloc_size + *len_defraged) < threshold) {
814 /*
815 * proceed defragmentation until we meet the thresh
816 */
817 *len_defraged += *alloc_size;
818 } else if (*len_defraged == 0) {
819 /*
820 * XXX: skip a large extent.
821 */
822 *skip = 1;
823 } else {
824 /*
825 * split this extent to coalesce with former pieces as
826 * to reach the threshold.
827 *
828 * we're done here with one cycle of defragmentation
829 * in a size of 'thresh', resetting 'len_defraged'
830 * forces a new defragmentation.
831 */
832 *alloc_size = threshold - *len_defraged;
833 *len_defraged = 0;
834 }
835}
836
837static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
838 struct ocfs2_move_extents_context *context)
839{
840 int ret = 0, flags, do_defrag, skip = 0;
841 u32 cpos, phys_cpos, move_start, len_to_move, alloc_size;
842 u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0;
843
844 struct inode *inode = context->inode;
845 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
846 struct ocfs2_move_extents *range = context->range;
847 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
848
849 if ((inode->i_size == 0) || (range->me_len == 0))
850 return 0;
851
852 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
853 return 0;
854
855 context->refcount_loc = le64_to_cpu(di->i_refcount_loc);
856
857 ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh);
858 ocfs2_init_dealloc_ctxt(&context->dealloc);
859
860 /*
861 * TO-DO XXX:
862 *
863 * - xattr extents.
864 */
865
866 do_defrag = context->auto_defrag;
867
868 /*
869 * extents moving happens in unit of clusters, for the sake
870 * of simplicity, we may ignore two clusters where 'byte_start'
871 * and 'byte_start + len' were within.
872 */
873 move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start);
874 len_to_move = (range->me_start + range->me_len) >>
875 osb->s_clustersize_bits;
876 if (len_to_move >= move_start)
877 len_to_move -= move_start;
878 else
879 len_to_move = 0;
880
881 if (do_defrag) {
882 defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
883 if (defrag_thresh <= 1)
884 goto done;
885 } else
886 new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
887 range->me_goal);
888
889 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, "
890 "thresh: %u\n",
891 (unsigned long long)OCFS2_I(inode)->ip_blkno,
892 (unsigned long long)range->me_start,
893 (unsigned long long)range->me_len,
894 move_start, len_to_move, defrag_thresh);
895
896 cpos = move_start;
897 while (len_to_move) {
898 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size,
899 &flags);
900 if (ret) {
901 mlog_errno(ret);
902 goto out;
903 }
904
905 if (alloc_size > len_to_move)
906 alloc_size = len_to_move;
907
908 /*
909 * XXX: how to deal with a hole:
910 *
911 * - skip the hole of course
912 * - force a new defragmentation
913 */
914 if (!phys_cpos) {
915 if (do_defrag)
916 len_defraged = 0;
917
918 goto next;
919 }
920
921 if (do_defrag) {
922 ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged,
923 defrag_thresh, &skip);
924 /*
925 * skip large extents
926 */
927 if (skip) {
928 skip = 0;
929 goto next;
930 }
931
932 mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, "
933 "alloc_size: %u, len_defraged: %u\n",
934 cpos, phys_cpos, alloc_size, len_defraged);
935
936 ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
937 &alloc_size, flags);
938 } else {
939 ret = ocfs2_move_extent(context, cpos, phys_cpos,
940 &new_phys_cpos, alloc_size,
941 flags);
942
943 new_phys_cpos += alloc_size;
944 }
945
946 if (ret < 0) {
947 mlog_errno(ret);
948 goto out;
949 }
950
951 context->clusters_moved += alloc_size;
952next:
953 cpos += alloc_size;
954 len_to_move -= alloc_size;
955 }
956
957done:
958 range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE;
959
960out:
961 range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb,
962 context->clusters_moved);
963 range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb,
964 context->new_phys_cpos);
965
966 ocfs2_schedule_truncate_log_flush(osb, 1);
967 ocfs2_run_deallocs(osb, &context->dealloc);
968
969 return ret;
970}
971
972static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
973{
974 int status;
975 handle_t *handle;
976 struct inode *inode = context->inode;
977 struct ocfs2_dinode *di;
978 struct buffer_head *di_bh = NULL;
979 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
980
981 if (!inode)
982 return -ENOENT;
983
984 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
985 return -EROFS;
986
987 mutex_lock(&inode->i_mutex);
988
989 /*
990 * This prevents concurrent writes from other nodes
991 */
992 status = ocfs2_rw_lock(inode, 1);
993 if (status) {
994 mlog_errno(status);
995 goto out;
996 }
997
998 status = ocfs2_inode_lock(inode, &di_bh, 1);
999 if (status) {
1000 mlog_errno(status);
1001 goto out_rw_unlock;
1002 }
1003
1004 /*
1005 * rememer ip_xattr_sem also needs to be held if necessary
1006 */
1007 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1008
1009 status = __ocfs2_move_extents_range(di_bh, context);
1010
1011 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1012 if (status) {
1013 mlog_errno(status);
1014 goto out_inode_unlock;
1015 }
1016
1017 /*
1018 * We update ctime for these changes
1019 */
1020 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1021 if (IS_ERR(handle)) {
1022 status = PTR_ERR(handle);
1023 mlog_errno(status);
1024 goto out_inode_unlock;
1025 }
1026
1027 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1028 OCFS2_JOURNAL_ACCESS_WRITE);
1029 if (status) {
1030 mlog_errno(status);
1031 goto out_commit;
1032 }
1033
1034 di = (struct ocfs2_dinode *)di_bh->b_data;
1035 inode->i_ctime = CURRENT_TIME;
1036 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1037 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1038
1039 ocfs2_journal_dirty(handle, di_bh);
1040
1041out_commit:
1042 ocfs2_commit_trans(osb, handle);
1043
1044out_inode_unlock:
1045 brelse(di_bh);
1046 ocfs2_inode_unlock(inode, 1);
1047out_rw_unlock:
1048 ocfs2_rw_unlock(inode, 1);
1049out:
1050 mutex_unlock(&inode->i_mutex);
1051
1052 return status;
1053}
1054
1055int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
1056{
1057 int status;
1058
1059 struct inode *inode = filp->f_path.dentry->d_inode;
1060 struct ocfs2_move_extents range;
1061 struct ocfs2_move_extents_context *context = NULL;
1062
1063 status = mnt_want_write(filp->f_path.mnt);
1064 if (status)
1065 return status;
1066
1067 if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
1068 goto out;
1069
1070 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
1071 status = -EPERM;
1072 goto out;
1073 }
1074
1075 context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
1076 if (!context) {
1077 status = -ENOMEM;
1078 mlog_errno(status);
1079 goto out;
1080 }
1081
1082 context->inode = inode;
1083 context->file = filp;
1084
1085 if (argp) {
1086 if (copy_from_user(&range, (struct ocfs2_move_extents *)argp,
1087 sizeof(range))) {
1088 status = -EFAULT;
1089 goto out;
1090 }
1091 } else {
1092 status = -EINVAL;
1093 goto out;
1094 }
1095
1096 if (range.me_start > i_size_read(inode))
1097 goto out;
1098
1099 if (range.me_start + range.me_len > i_size_read(inode))
1100 range.me_len = i_size_read(inode) - range.me_start;
1101
1102 context->range = &range;
1103
1104 if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
1105 context->auto_defrag = 1;
1106 /*
1107 * ok, the default theshold for the defragmentation
1108 * is 1M, since our maximum clustersize was 1M also.
1109 * any thought?
1110 */
1111 if (!range.me_threshold)
1112 range.me_threshold = 1024 * 1024;
1113
1114 if (range.me_threshold > i_size_read(inode))
1115 range.me_threshold = i_size_read(inode);
1116
1117 if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
1118 context->partial = 1;
1119 } else {
1120 /*
1121 * first best-effort attempt to validate and adjust the goal
1122 * (physical address in block), while it can't guarantee later
1123 * operation can succeed all the time since global_bitmap may
1124 * change a bit over time.
1125 */
1126
1127 status = ocfs2_validate_and_adjust_move_goal(inode, &range);
1128 if (status)
1129 goto out;
1130 }
1131
1132 status = ocfs2_move_extents(context);
1133 if (status)
1134 mlog_errno(status);
1135out:
1136 /*
1137 * movement/defragmentation may end up being partially completed,
1138 * that's the reason why we need to return userspace the finished
1139 * length and new_offset even if failure happens somewhere.
1140 */
1141 if (argp) {
1142 if (copy_to_user((struct ocfs2_move_extents *)argp, &range,
1143 sizeof(range)))
1144 status = -EFAULT;
1145 }
1146
1147 kfree(context);
1148
1149 mnt_drop_write(filp->f_path.mnt);
1150
1151 return status;
1152}
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h
new file mode 100644
index 000000000000..4e143e811441
--- /dev/null
+++ b/fs/ocfs2/move_extents.h
@@ -0,0 +1,22 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * move_extents.h
5 *
6 * Copyright (C) 2011 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17#ifndef OCFS2_MOVE_EXTENTS_H
18#define OCFS2_MOVE_EXTENTS_H
19
20int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp);
21
22#endif /* OCFS2_MOVE_EXTENTS_H */
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index b46f39bf7438..5b27ff1fa577 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -142,6 +142,38 @@ struct ocfs2_info_journal_size {
142 __u64 ij_journal_size; 142 __u64 ij_journal_size;
143}; 143};
144 144
145struct ocfs2_info_freeinode {
146 struct ocfs2_info_request ifi_req;
147 struct ocfs2_info_local_freeinode {
148 __u64 lfi_total;
149 __u64 lfi_free;
150 } ifi_stat[OCFS2_MAX_SLOTS];
151 __u32 ifi_slotnum; /* out */
152 __u32 ifi_pad;
153};
154
155#define OCFS2_INFO_MAX_HIST (32)
156
157struct ocfs2_info_freefrag {
158 struct ocfs2_info_request iff_req;
159 struct ocfs2_info_freefrag_stats { /* (out) */
160 struct ocfs2_info_free_chunk_list {
161 __u32 fc_chunks[OCFS2_INFO_MAX_HIST];
162 __u32 fc_clusters[OCFS2_INFO_MAX_HIST];
163 } ffs_fc_hist;
164 __u32 ffs_clusters;
165 __u32 ffs_free_clusters;
166 __u32 ffs_free_chunks;
167 __u32 ffs_free_chunks_real;
168 __u32 ffs_min; /* Minimum free chunksize in clusters */
169 __u32 ffs_max;
170 __u32 ffs_avg;
171 __u32 ffs_pad;
172 } iff_ffs;
173 __u32 iff_chunksize; /* chunksize in clusters(in) */
174 __u32 iff_pad;
175};
176
145/* Codes for ocfs2_info_request */ 177/* Codes for ocfs2_info_request */
146enum ocfs2_info_type { 178enum ocfs2_info_type {
147 OCFS2_INFO_CLUSTERSIZE = 1, 179 OCFS2_INFO_CLUSTERSIZE = 1,
@@ -151,6 +183,8 @@ enum ocfs2_info_type {
151 OCFS2_INFO_UUID, 183 OCFS2_INFO_UUID,
152 OCFS2_INFO_FS_FEATURES, 184 OCFS2_INFO_FS_FEATURES,
153 OCFS2_INFO_JOURNAL_SIZE, 185 OCFS2_INFO_JOURNAL_SIZE,
186 OCFS2_INFO_FREEINODE,
187 OCFS2_INFO_FREEFRAG,
154 OCFS2_INFO_NUM_TYPES 188 OCFS2_INFO_NUM_TYPES
155}; 189};
156 190
@@ -171,4 +205,38 @@ enum ocfs2_info_type {
171 205
172#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) 206#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
173 207
208struct ocfs2_move_extents {
209/* All values are in bytes */
210 /* in */
211 __u64 me_start; /* Virtual start in the file to move */
212 __u64 me_len; /* Length of the extents to be moved */
213 __u64 me_goal; /* Physical offset of the goal,
214 it's in block unit */
215 __u64 me_threshold; /* Maximum distance from goal or threshold
216 for auto defragmentation */
217 __u64 me_flags; /* Flags for the operation:
218 * - auto defragmentation.
219 * - refcount,xattr cases.
220 */
221 /* out */
222 __u64 me_moved_len; /* Moved/defraged length */
223 __u64 me_new_offset; /* Resulting physical location */
224 __u32 me_reserved[2]; /* Reserved for futhure */
225};
226
227#define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to
228 claim new clusters
229 as the goal place
230 for extents moving */
231#define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent
232 moving, is to make
233 movement less likely
234 to fail, may make fs
235 even more fragmented */
236#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation
237 completely gets done.
238 */
239
240#define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents)
241
174#endif /* OCFS2_IOCTL_H */ 242#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a1dae5bb54ac..3b481f490633 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc,
688 __entry->blkno, __entry->bit) 688 __entry->blkno, __entry->bit)
689); 689);
690 690
691TRACE_EVENT(ocfs2_trim_extent,
692 TP_PROTO(struct super_block *sb, unsigned long long blk,
693 unsigned long long count),
694 TP_ARGS(sb, blk, count),
695 TP_STRUCT__entry(
696 __field(int, dev_major)
697 __field(int, dev_minor)
698 __field(unsigned long long, blk)
699 __field(__u64, count)
700 ),
701 TP_fast_assign(
702 __entry->dev_major = MAJOR(sb->s_dev);
703 __entry->dev_minor = MINOR(sb->s_dev);
704 __entry->blk = blk;
705 __entry->count = count;
706 ),
707 TP_printk("%d %d %llu %llu",
708 __entry->dev_major, __entry->dev_minor,
709 __entry->blk, __entry->count)
710);
711
712DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group);
713
714DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs);
715
691/* End of trace events for fs/ocfs2/alloc.c. */ 716/* End of trace events for fs/ocfs2/alloc.c. */
692 717
693/* Trace events for fs/ocfs2/localalloc.c. */ 718/* Trace events for fs/ocfs2/localalloc.c. */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..ebfd3825f12a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -66,7 +66,7 @@ struct ocfs2_cow_context {
66 u32 *num_clusters, 66 u32 *num_clusters,
67 unsigned int *extent_flags); 67 unsigned int *extent_flags);
68 int (*cow_duplicate_clusters)(handle_t *handle, 68 int (*cow_duplicate_clusters)(handle_t *handle,
69 struct ocfs2_cow_context *context, 69 struct file *file,
70 u32 cpos, u32 old_cluster, 70 u32 cpos, u32 old_cluster,
71 u32 new_cluster, u32 new_len); 71 u32 new_cluster, u32 new_len);
72}; 72};
@@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2921 return 0; 2921 return 0;
2922} 2922}
2923 2923
2924static int ocfs2_duplicate_clusters_by_page(handle_t *handle, 2924int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2925 struct ocfs2_cow_context *context, 2925 struct file *file,
2926 u32 cpos, u32 old_cluster, 2926 u32 cpos, u32 old_cluster,
2927 u32 new_cluster, u32 new_len) 2927 u32 new_cluster, u32 new_len)
2928{ 2928{
2929 int ret = 0, partial; 2929 int ret = 0, partial;
2930 struct ocfs2_caching_info *ci = context->data_et.et_ci; 2930 struct inode *inode = file->f_path.dentry->d_inode;
2931 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
2931 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2932 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2932 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2933 struct page *page; 2934 struct page *page;
2934 pgoff_t page_index; 2935 pgoff_t page_index;
2935 unsigned int from, to, readahead_pages; 2936 unsigned int from, to, readahead_pages;
2936 loff_t offset, end, map_end; 2937 loff_t offset, end, map_end;
2937 struct address_space *mapping = context->inode->i_mapping; 2938 struct address_space *mapping = inode->i_mapping;
2938 2939
2939 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, 2940 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
2940 new_cluster, new_len); 2941 new_cluster, new_len);
@@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2948 * We only duplicate pages until we reach the page contains i_size - 1. 2949 * We only duplicate pages until we reach the page contains i_size - 1.
2949 * So trim 'end' to i_size. 2950 * So trim 'end' to i_size.
2950 */ 2951 */
2951 if (end > i_size_read(context->inode)) 2952 if (end > i_size_read(inode))
2952 end = i_size_read(context->inode); 2953 end = i_size_read(inode);
2953 2954
2954 while (offset < end) { 2955 while (offset < end) {
2955 page_index = offset >> PAGE_CACHE_SHIFT; 2956 page_index = offset >> PAGE_CACHE_SHIFT;
@@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2972 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2973 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2973 BUG_ON(PageDirty(page)); 2974 BUG_ON(PageDirty(page));
2974 2975
2975 if (PageReadahead(page) && context->file) { 2976 if (PageReadahead(page)) {
2976 page_cache_async_readahead(mapping, 2977 page_cache_async_readahead(mapping,
2977 &context->file->f_ra, 2978 &file->f_ra, file,
2978 context->file,
2979 page, page_index, 2979 page, page_index,
2980 readahead_pages); 2980 readahead_pages);
2981 } 2981 }
@@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2999 } 2999 }
3000 } 3000 }
3001 3001
3002 ocfs2_map_and_dirty_page(context->inode, 3002 ocfs2_map_and_dirty_page(inode, handle, from, to,
3003 handle, from, to,
3004 page, 0, &new_block); 3003 page, 0, &new_block);
3005 mark_page_accessed(page); 3004 mark_page_accessed(page);
3006unlock: 3005unlock:
@@ -3015,14 +3014,15 @@ unlock:
3015 return ret; 3014 return ret;
3016} 3015}
3017 3016
3018static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 3017int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3019 struct ocfs2_cow_context *context, 3018 struct file *file,
3020 u32 cpos, u32 old_cluster, 3019 u32 cpos, u32 old_cluster,
3021 u32 new_cluster, u32 new_len) 3020 u32 new_cluster, u32 new_len)
3022{ 3021{
3023 int ret = 0; 3022 int ret = 0;
3024 struct super_block *sb = context->inode->i_sb; 3023 struct inode *inode = file->f_path.dentry->d_inode;
3025 struct ocfs2_caching_info *ci = context->data_et.et_ci; 3024 struct super_block *sb = inode->i_sb;
3025 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
3026 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); 3026 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
3027 u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); 3027 u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
3028 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 3028 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
@@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle,
3145 3145
3146 /*If the old clusters is unwritten, no need to duplicate. */ 3146 /*If the old clusters is unwritten, no need to duplicate. */
3147 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3147 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
3148 ret = context->cow_duplicate_clusters(handle, context, cpos, 3148 ret = context->cow_duplicate_clusters(handle, context->file,
3149 old, new, len); 3149 cpos, old, new, len);
3150 if (ret) { 3150 if (ret) {
3151 mlog_errno(ret); 3151 mlog_errno(ret);
3152 goto out; 3152 goto out;
@@ -3162,22 +3162,22 @@ out:
3162 return ret; 3162 return ret;
3163} 3163}
3164 3164
3165static int ocfs2_cow_sync_writeback(struct super_block *sb, 3165int ocfs2_cow_sync_writeback(struct super_block *sb,
3166 struct ocfs2_cow_context *context, 3166 struct inode *inode,
3167 u32 cpos, u32 num_clusters) 3167 u32 cpos, u32 num_clusters)
3168{ 3168{
3169 int ret = 0; 3169 int ret = 0;
3170 loff_t offset, end, map_end; 3170 loff_t offset, end, map_end;
3171 pgoff_t page_index; 3171 pgoff_t page_index;
3172 struct page *page; 3172 struct page *page;
3173 3173
3174 if (ocfs2_should_order_data(context->inode)) 3174 if (ocfs2_should_order_data(inode))
3175 return 0; 3175 return 0;
3176 3176
3177 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 3177 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
3178 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); 3178 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
3179 3179
3180 ret = filemap_fdatawrite_range(context->inode->i_mapping, 3180 ret = filemap_fdatawrite_range(inode->i_mapping,
3181 offset, end - 1); 3181 offset, end - 1);
3182 if (ret < 0) { 3182 if (ret < 0) {
3183 mlog_errno(ret); 3183 mlog_errno(ret);
@@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
3190 if (map_end > end) 3190 if (map_end > end)
3191 map_end = end; 3191 map_end = end;
3192 3192
3193 page = find_or_create_page(context->inode->i_mapping, 3193 page = find_or_create_page(inode->i_mapping,
3194 page_index, GFP_NOFS); 3194 page_index, GFP_NOFS);
3195 BUG_ON(!page); 3195 BUG_ON(!page);
3196 3196
@@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3349 * in write-back mode. 3349 * in write-back mode.
3350 */ 3350 */
3351 if (context->get_clusters == ocfs2_di_get_clusters) { 3351 if (context->get_clusters == ocfs2_di_get_clusters) {
3352 ret = ocfs2_cow_sync_writeback(sb, context, cpos, 3352 ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos,
3353 orig_num_clusters); 3353 orig_num_clusters);
3354 if (ret) 3354 if (ret)
3355 mlog_errno(ret); 3355 mlog_errno(ret);
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
3706 context->cow_start = cow_start; 3706 context->cow_start = cow_start;
3707 context->cow_len = cow_len; 3707 context->cow_len = cow_len;
3708 context->ref_tree = ref_tree; 3708 context->ref_tree = ref_tree;
3709 context->ref_root_bh = ref_root_bh;; 3709 context->ref_root_bh = ref_root_bh;
3710 context->cow_object = xv; 3710 context->cow_object = xv;
3711 3711
3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; 3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index c8ce46f7d8e3..7754608c83a4 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
84 struct buffer_head *ref_root_bh, 84 struct buffer_head *ref_root_bh,
85 u32 cpos, u32 write_len, 85 u32 cpos, u32 write_len,
86 struct ocfs2_post_refcount *post); 86 struct ocfs2_post_refcount *post);
87int ocfs2_duplicate_clusters_by_page(handle_t *handle,
88 struct file *file,
89 u32 cpos, u32 old_cluster,
90 u32 new_cluster, u32 new_len);
91int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
92 struct file *file,
93 u32 cpos, u32 old_cluster,
94 u32 new_cluster, u32 new_len);
95int ocfs2_cow_sync_writeback(struct super_block *sb,
96 struct inode *inode,
97 u32 cpos, u32 num_clusters);
87int ocfs2_add_refcount_flag(struct inode *inode, 98int ocfs2_add_refcount_flag(struct inode *inode,
88 struct ocfs2_extent_tree *data_et, 99 struct ocfs2_extent_tree *data_et,
89 struct ocfs2_caching_info *ref_ci, 100 struct ocfs2_caching_info *ref_ci,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 5a521c748859..cdbaf5e97308 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44#include <linux/cleancache.h>
44 45
45#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
46#include "ocfs2_trace.h" 47#include "ocfs2_trace.h"
@@ -1566,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1566 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 1567 if (osb->preferred_slot != OCFS2_INVALID_SLOT)
1567 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 1568 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
1568 1569
1569 if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) 1570 if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME))
1570 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); 1571 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
1571 1572
1572 if (osb->osb_commit_interval) 1573 if (osb->osb_commit_interval)
@@ -2352,6 +2353,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2352 mlog_errno(status); 2353 mlog_errno(status);
2353 goto bail; 2354 goto bail;
2354 } 2355 }
2356 cleancache_init_shared_fs((char *)&uuid_net_key, sb);
2355 2357
2356bail: 2358bail:
2357 return status; 2359 return status;
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index de4ff29f1e05..c368360c35a1 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -240,8 +240,12 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry)
240 struct inode *inode = dentry->d_inode; 240 struct inode *inode = dentry->d_inode;
241 int ret; 241 int ret;
242 242
243 if (S_ISDIR(inode->i_mode) && !omfs_dir_is_empty(inode)) 243
244 return -ENOTEMPTY; 244 if (S_ISDIR(inode->i_mode)) {
245 dentry_unhash(dentry);
246 if (!omfs_dir_is_empty(inode))
247 return -ENOTEMPTY;
248 }
245 249
246 ret = omfs_delete_entry(dentry); 250 ret = omfs_delete_entry(dentry);
247 if (ret) 251 if (ret)
@@ -378,6 +382,9 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
378 int err; 382 int err;
379 383
380 if (new_inode) { 384 if (new_inode) {
385 if (S_ISDIR(new_inode->i_mode))
386 dentry_unhash(new_dentry);
387
381 /* overwriting existing file/dir */ 388 /* overwriting existing file/dir */
382 err = omfs_remove(new_dir, new_dentry); 389 err = omfs_remove(new_dir, new_dentry);
383 if (err) 390 if (err)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..f82e762eeca2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,13 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment); 258 struct gendisk *disk = dev_to_disk(dev);
259 unsigned int alignment = 0;
260
261 if (disk->queue)
262 alignment = queue_limit_discard_alignment(&disk->queue->limits,
263 p->start_sect);
264 return sprintf(buf, "%u\n", alignment);
259} 265}
260 266
261ssize_t part_stat_show(struct device *dev, 267ssize_t part_stat_show(struct device *dev,
@@ -449,8 +455,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
449 p->start_sect = start; 455 p->start_sect = start;
450 p->alignment_offset = 456 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start); 457 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len; 458 p->nr_sects = len;
455 p->partno = partno; 459 p->partno = partno;
456 p->policy = get_disk_ro(disk); 460 p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 19d6750d1d6c..6296b403c67a 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -310,6 +310,15 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
310 goto fail; 310 goto fail;
311 } 311 }
312 312
313 /* Check the GUID Partition Table header size */
314 if (le32_to_cpu((*gpt)->header_size) >
315 bdev_logical_block_size(state->bdev)) {
316 pr_debug("GUID Partition Table Header size is wrong: %u > %u\n",
317 le32_to_cpu((*gpt)->header_size),
318 bdev_logical_block_size(state->bdev));
319 goto fail;
320 }
321
313 /* Check the GUID Partition Table CRC */ 322 /* Check the GUID Partition Table CRC */
314 origcrc = le32_to_cpu((*gpt)->header_crc32); 323 origcrc = le32_to_cpu((*gpt)->header_crc32);
315 (*gpt)->header_crc32 = 0; 324 (*gpt)->header_crc32 = 0;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
565 565
566 data = read_part_sector(state, 0, &sect); 566 data = read_part_sector(state, 0, &sect);
567 if (!data) { 567 if (!data) {
568 ldm_crit ("Disk read failed."); 568 ldm_info ("Disk read failed.");
569 return false; 569 return false;
570 } 570 }
571 571
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1335 1335
1336 list_add_tail (&f->list, frags); 1336 list_add_tail (&f->list, frags);
1337found: 1337found:
1338 if (rec >= f->num) {
1339 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1340 return false;
1341 }
1342
1338 if (f->map & (1 << rec)) { 1343 if (f->map & (1 << rec)) {
1339 ldm_error ("Duplicate VBLK, part %d.", rec); 1344 ldm_error ("Duplicate VBLK, part %d.", rec);
1340 f->map &= 0x7F; /* Mark the group as broken */ 1345 f->map &= 0x7F; /* Mark the group as broken */
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
20proc-y += uptime.o 20proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o
23proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
24proc-$(CONFIG_NET) += proc_net.o 25proc-$(CONFIG_NET) += proc_net.o
25proc-$(CONFIG_PROC_KCORE) += kcore.o 26proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e4f776b0917..9b45ee84fbcc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -131,7 +131,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
131 * you can test for combinations of others with 131 * you can test for combinations of others with
132 * simple bit tests. 132 * simple bit tests.
133 */ 133 */
134static const char *task_state_array[] = { 134static const char * const task_state_array[] = {
135 "R (running)", /* 0 */ 135 "R (running)", /* 0 */
136 "S (sleeping)", /* 1 */ 136 "S (sleeping)", /* 1 */
137 "D (disk sleep)", /* 2 */ 137 "D (disk sleep)", /* 2 */
@@ -147,7 +147,7 @@ static const char *task_state_array[] = {
147static inline const char *get_task_state(struct task_struct *tsk) 147static inline const char *get_task_state(struct task_struct *tsk)
148{ 148{
149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 149 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
150 const char **p = &task_state_array[0]; 150 const char * const *p = &task_state_array[0];
151 151
152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); 152 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
153 153
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..4ede550517a6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
600 return allowed; 600 return allowed;
601} 601}
602 602
603static int proc_setattr(struct dentry *dentry, struct iattr *attr) 603int proc_setattr(struct dentry *dentry, struct iattr *attr)
604{ 604{
605 int error; 605 int error;
606 struct inode *inode = dentry->d_inode; 606 struct inode *inode = dentry->d_inode;
@@ -894,20 +894,20 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
894 if (!task) 894 if (!task)
895 goto out_no_task; 895 goto out_no_task;
896 896
897 copied = -ENOMEM;
898 page = (char *)__get_free_page(GFP_TEMPORARY);
899 if (!page)
900 goto out_task;
901
897 mm = check_mem_permission(task); 902 mm = check_mem_permission(task);
898 copied = PTR_ERR(mm); 903 copied = PTR_ERR(mm);
899 if (IS_ERR(mm)) 904 if (IS_ERR(mm))
900 goto out_task; 905 goto out_free;
901 906
902 copied = -EIO; 907 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id)) 908 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm; 909 goto out_mm;
905 910
906 copied = -ENOMEM;
907 page = (char *)__get_free_page(GFP_TEMPORARY);
908 if (!page)
909 goto out_mm;
910
911 copied = 0; 911 copied = 0;
912 while (count > 0) { 912 while (count > 0) {
913 int this_len, retval; 913 int this_len, retval;
@@ -929,9 +929,11 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
929 count -= retval; 929 count -= retval;
930 } 930 }
931 *ppos = dst; 931 *ppos = dst;
932 free_page((unsigned long) page); 932
933out_mm: 933out_mm:
934 mmput(mm); 934 mmput(mm);
935out_free:
936 free_page((unsigned long) page);
935out_task: 937out_task:
936 put_task_struct(task); 938 put_task_struct(task);
937out_no_task: 939out_no_task:
@@ -1059,7 +1061,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1059{ 1061{
1060 struct task_struct *task; 1062 struct task_struct *task;
1061 char buffer[PROC_NUMBUF]; 1063 char buffer[PROC_NUMBUF];
1062 long oom_adjust; 1064 int oom_adjust;
1063 unsigned long flags; 1065 unsigned long flags;
1064 int err; 1066 int err;
1065 1067
@@ -1071,7 +1073,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1071 goto out; 1073 goto out;
1072 } 1074 }
1073 1075
1074 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1076 err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
1075 if (err) 1077 if (err)
1076 goto out; 1078 goto out;
1077 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1079 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
@@ -1168,7 +1170,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1168 struct task_struct *task; 1170 struct task_struct *task;
1169 char buffer[PROC_NUMBUF]; 1171 char buffer[PROC_NUMBUF];
1170 unsigned long flags; 1172 unsigned long flags;
1171 long oom_score_adj; 1173 int oom_score_adj;
1172 int err; 1174 int err;
1173 1175
1174 memset(buffer, 0, sizeof(buffer)); 1176 memset(buffer, 0, sizeof(buffer));
@@ -1179,7 +1181,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1179 goto out; 1181 goto out;
1180 } 1182 }
1181 1183
1182 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1184 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
1183 if (err) 1185 if (err)
1184 goto out; 1186 goto out;
1185 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1187 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
@@ -1468,7 +1470,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1468 struct inode *inode = file->f_path.dentry->d_inode; 1470 struct inode *inode = file->f_path.dentry->d_inode;
1469 struct task_struct *p; 1471 struct task_struct *p;
1470 char buffer[PROC_NUMBUF]; 1472 char buffer[PROC_NUMBUF];
1471 long nice; 1473 int nice;
1472 int err; 1474 int err;
1473 1475
1474 memset(buffer, 0, sizeof(buffer)); 1476 memset(buffer, 0, sizeof(buffer));
@@ -1477,9 +1479,9 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1477 if (copy_from_user(buffer, buf, count)) 1479 if (copy_from_user(buffer, buf, count))
1478 return -EFAULT; 1480 return -EFAULT;
1479 1481
1480 err = strict_strtol(strstrip(buffer), 0, &nice); 1482 err = kstrtoint(strstrip(buffer), 0, &nice);
1481 if (err) 1483 if (err < 0)
1482 return -EINVAL; 1484 return err;
1483 1485
1484 p = get_proc_task(inode); 1486 p = get_proc_task(inode);
1485 if (!p) 1487 if (!p)
@@ -1576,57 +1578,6 @@ static const struct file_operations proc_pid_set_comm_operations = {
1576 .release = single_release, 1578 .release = single_release,
1577}; 1579};
1578 1580
1579/*
1580 * We added or removed a vma mapping the executable. The vmas are only mapped
1581 * during exec and are not mapped with the mmap system call.
1582 * Callers must hold down_write() on the mm's mmap_sem for these
1583 */
1584void added_exe_file_vma(struct mm_struct *mm)
1585{
1586 mm->num_exe_file_vmas++;
1587}
1588
1589void removed_exe_file_vma(struct mm_struct *mm)
1590{
1591 mm->num_exe_file_vmas--;
1592 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1593 fput(mm->exe_file);
1594 mm->exe_file = NULL;
1595 }
1596
1597}
1598
1599void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1600{
1601 if (new_exe_file)
1602 get_file(new_exe_file);
1603 if (mm->exe_file)
1604 fput(mm->exe_file);
1605 mm->exe_file = new_exe_file;
1606 mm->num_exe_file_vmas = 0;
1607}
1608
1609struct file *get_mm_exe_file(struct mm_struct *mm)
1610{
1611 struct file *exe_file;
1612
1613 /* We need mmap_sem to protect against races with removal of
1614 * VM_EXECUTABLE vmas */
1615 down_read(&mm->mmap_sem);
1616 exe_file = mm->exe_file;
1617 if (exe_file)
1618 get_file(exe_file);
1619 up_read(&mm->mmap_sem);
1620 return exe_file;
1621}
1622
1623void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1624{
1625 /* It's safe to write the exe_file pointer without exe_file_lock because
1626 * this is called during fork when the task is not yet in /proc */
1627 newmm->exe_file = get_mm_exe_file(oldmm);
1628}
1629
1630static int proc_exe_link(struct inode *inode, struct path *exe_path) 1581static int proc_exe_link(struct inode *inode, struct path *exe_path)
1631{ 1582{
1632 struct task_struct *task; 1583 struct task_struct *task;
@@ -1736,8 +1687,7 @@ static int task_dumpable(struct task_struct *task)
1736 return 0; 1687 return 0;
1737} 1688}
1738 1689
1739 1690struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1740static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1741{ 1691{
1742 struct inode * inode; 1692 struct inode * inode;
1743 struct proc_inode *ei; 1693 struct proc_inode *ei;
@@ -1779,7 +1729,7 @@ out_unlock:
1779 return NULL; 1729 return NULL;
1780} 1730}
1781 1731
1782static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1732int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1783{ 1733{
1784 struct inode *inode = dentry->d_inode; 1734 struct inode *inode = dentry->d_inode;
1785 struct task_struct *task; 1735 struct task_struct *task;
@@ -1820,7 +1770,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1820 * made this apply to all per process world readable and executable 1770 * made this apply to all per process world readable and executable
1821 * directories. 1771 * directories.
1822 */ 1772 */
1823static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1773int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1824{ 1774{
1825 struct inode *inode; 1775 struct inode *inode;
1826 struct task_struct *task; 1776 struct task_struct *task;
@@ -1862,7 +1812,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
1862 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1812 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1863} 1813}
1864 1814
1865static const struct dentry_operations pid_dentry_operations = 1815const struct dentry_operations pid_dentry_operations =
1866{ 1816{
1867 .d_revalidate = pid_revalidate, 1817 .d_revalidate = pid_revalidate,
1868 .d_delete = pid_delete_dentry, 1818 .d_delete = pid_delete_dentry,
@@ -1870,9 +1820,6 @@ static const struct dentry_operations pid_dentry_operations =
1870 1820
1871/* Lookups */ 1821/* Lookups */
1872 1822
1873typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1874 struct task_struct *, const void *);
1875
1876/* 1823/*
1877 * Fill a directory entry. 1824 * Fill a directory entry.
1878 * 1825 *
@@ -1885,8 +1832,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1885 * reported by readdir in sync with the inode numbers reported 1832 * reported by readdir in sync with the inode numbers reported
1886 * by stat. 1833 * by stat.
1887 */ 1834 */
1888static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1835int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1889 char *name, int len, 1836 const char *name, int len,
1890 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1837 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1891{ 1838{
1892 struct dentry *child, *dir = filp->f_path.dentry; 1839 struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2767,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2820 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2767 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2821 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2768 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2822 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2769 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2770 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2823#ifdef CONFIG_NET 2771#ifdef CONFIG_NET
2824 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2772 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2825#endif 2773#endif
@@ -3168,6 +3116,7 @@ out_no_task:
3168static const struct pid_entry tid_base_stuff[] = { 3116static const struct pid_entry tid_base_stuff[] = {
3169 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3117 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3170 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3118 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3119 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3171 REG("environ", S_IRUSR, proc_environ_operations), 3120 REG("environ", S_IRUSR, proc_environ_operations),
3172 INF("auxv", S_IRUSR, proc_pid_auxv), 3121 INF("auxv", S_IRUSR, proc_pid_auxv),
3173 ONE("status", S_IRUGO, proc_pid_status), 3122 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1281339b6fa..f1637f17c37c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -674,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
674 } 674 }
675 return ent; 675 return ent;
676} 676}
677EXPORT_SYMBOL(proc_mkdir_mode);
677 678
678struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 679struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
679 struct proc_dir_entry *parent) 680 struct proc_dir_entry *parent)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head; 30 struct ctl_table_header *head;
31 const struct proc_ns_operations *ns_ops;
31 32
32 truncate_inode_pages(&inode->i_data, 0); 33 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode); 34 end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 45 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head); 46 sysctl_head_put(head);
46 } 47 }
48 /* Release any associated namespace */
49 ns_ops = PROC_I(inode)->ns_ops;
50 if (ns_ops && ns_ops->put)
51 ns_ops->put(PROC_I(inode)->ns);
47} 52}
48 53
49static struct kmem_cache * proc_inode_cachep; 54static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
62 ei->pde = NULL; 67 ei->pde = NULL;
63 ei->sysctl = NULL; 68 ei->sysctl = NULL;
64 ei->sysctl_entry = NULL; 69 ei->sysctl_entry = NULL;
70 ei->ns = NULL;
71 ei->ns_ops = NULL;
65 inode = &ei->vfs_inode; 72 inode = &ei->vfs_inode;
66 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 73 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
67 return inode; 74 return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..7838e5cfec14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64struct proc_maps_private {
65 struct pid *pid;
66 struct task_struct *task;
67#ifdef CONFIG_MMU
68 struct vm_area_struct *tail_vma;
69#endif
70};
71
64void proc_init_inodecache(void); 72void proc_init_inodecache(void);
65 73
66static inline struct pid *proc_pid(struct inode *inode) 74static inline struct pid *proc_pid(struct inode *inode)
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
119 */ 127 */
120int proc_readdir(struct file *, void *, filldir_t); 128int proc_readdir(struct file *, void *, filldir_t);
121struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 129struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
130
131
132
133/* Lookups */
134typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
135 struct task_struct *, const void *);
136int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
137 const char *name, int len,
138 instantiate_t instantiate, struct task_struct *task, const void *ptr);
139int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
140struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
141extern const struct dentry_operations pid_dentry_operations;
142int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
143int proc_setattr(struct dentry *dentry, struct iattr *attr);
144
145extern const struct inode_operations proc_ns_dir_inode_operations;
146extern const struct file_operations proc_ns_dir_operations;
147
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..781dec5bd682
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,198 @@
1#include <linux/proc_fs.h>
2#include <linux/nsproxy.h>
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <linux/fs_struct.h>
6#include <linux/mount.h>
7#include <linux/path.h>
8#include <linux/namei.h>
9#include <linux/file.h>
10#include <linux/utsname.h>
11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h>
15#include "internal.h"
16
17
18static const struct proc_ns_operations *ns_entries[] = {
19#ifdef CONFIG_NET_NS
20 &netns_operations,
21#endif
22#ifdef CONFIG_UTS_NS
23 &utsns_operations,
24#endif
25#ifdef CONFIG_IPC_NS
26 &ipcns_operations,
27#endif
28};
29
30static const struct file_operations ns_file_operations = {
31 .llseek = no_llseek,
32};
33
34static struct dentry *proc_ns_instantiate(struct inode *dir,
35 struct dentry *dentry, struct task_struct *task, const void *ptr)
36{
37 const struct proc_ns_operations *ns_ops = ptr;
38 struct inode *inode;
39 struct proc_inode *ei;
40 struct dentry *error = ERR_PTR(-ENOENT);
41
42 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode)
44 goto out;
45
46 ei = PROC_I(inode);
47 inode->i_mode = S_IFREG|S_IRUSR;
48 inode->i_fop = &ns_file_operations;
49 ei->ns_ops = ns_ops;
50 ei->ns = ns_ops->get(task);
51 if (!ei->ns)
52 goto out_iput;
53
54 dentry->d_op = &pid_dentry_operations;
55 d_add(dentry, inode);
56 /* Close the race of the process dying before we return the dentry */
57 if (pid_revalidate(dentry, NULL))
58 error = NULL;
59out:
60 return error;
61out_iput:
62 iput(inode);
63 goto out;
64}
65
66static int proc_ns_fill_cache(struct file *filp, void *dirent,
67 filldir_t filldir, struct task_struct *task,
68 const struct proc_ns_operations *ops)
69{
70 return proc_fill_cache(filp, dirent, filldir,
71 ops->name, strlen(ops->name),
72 proc_ns_instantiate, task, ops);
73}
74
75static int proc_ns_dir_readdir(struct file *filp, void *dirent,
76 filldir_t filldir)
77{
78 int i;
79 struct dentry *dentry = filp->f_path.dentry;
80 struct inode *inode = dentry->d_inode;
81 struct task_struct *task = get_proc_task(inode);
82 const struct proc_ns_operations **entry, **last;
83 ino_t ino;
84 int ret;
85
86 ret = -ENOENT;
87 if (!task)
88 goto out_no_task;
89
90 ret = -EPERM;
91 if (!ptrace_may_access(task, PTRACE_MODE_READ))
92 goto out;
93
94 ret = 0;
95 i = filp->f_pos;
96 switch (i) {
97 case 0:
98 ino = inode->i_ino;
99 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
100 goto out;
101 i++;
102 filp->f_pos++;
103 /* fall through */
104 case 1:
105 ino = parent_ino(dentry);
106 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
107 goto out;
108 i++;
109 filp->f_pos++;
110 /* fall through */
111 default:
112 i -= 2;
113 if (i >= ARRAY_SIZE(ns_entries)) {
114 ret = 1;
115 goto out;
116 }
117 entry = ns_entries + i;
118 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
119 while (entry <= last) {
120 if (proc_ns_fill_cache(filp, dirent, filldir,
121 task, *entry) < 0)
122 goto out;
123 filp->f_pos++;
124 entry++;
125 }
126 }
127
128 ret = 1;
129out:
130 put_task_struct(task);
131out_no_task:
132 return ret;
133}
134
135const struct file_operations proc_ns_dir_operations = {
136 .read = generic_read_dir,
137 .readdir = proc_ns_dir_readdir,
138};
139
140static struct dentry *proc_ns_dir_lookup(struct inode *dir,
141 struct dentry *dentry, struct nameidata *nd)
142{
143 struct dentry *error;
144 struct task_struct *task = get_proc_task(dir);
145 const struct proc_ns_operations **entry, **last;
146 unsigned int len = dentry->d_name.len;
147
148 error = ERR_PTR(-ENOENT);
149
150 if (!task)
151 goto out_no_task;
152
153 error = ERR_PTR(-EPERM);
154 if (!ptrace_may_access(task, PTRACE_MODE_READ))
155 goto out;
156
157 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
158 for (entry = ns_entries; entry <= last; entry++) {
159 if (strlen((*entry)->name) != len)
160 continue;
161 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
162 break;
163 }
164 error = ERR_PTR(-ENOENT);
165 if (entry > last)
166 goto out;
167
168 error = proc_ns_instantiate(dir, dentry, task, *entry);
169out:
170 put_task_struct(task);
171out_no_task:
172 return error;
173}
174
175const struct inode_operations proc_ns_dir_inode_operations = {
176 .lookup = proc_ns_dir_lookup,
177 .getattr = pid_getattr,
178 .setattr = proc_setattr,
179};
180
181struct file *proc_ns_fget(int fd)
182{
183 struct file *file;
184
185 file = fget(fd);
186 if (!file)
187 return ERR_PTR(-EBADF);
188
189 if (file->f_op != &ns_file_operations)
190 goto out_invalid;
191
192 return file;
193
194out_invalid:
195 fput(file);
196 return ERR_PTR(-EINVAL);
197}
198
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 1cffa2b8a2fc..9758b654a1bc 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -138,9 +138,9 @@ static int stat_open(struct inode *inode, struct file *file)
138 struct seq_file *m; 138 struct seq_file *m;
139 int res; 139 int res;
140 140
141 /* don't ask for more than the kmalloc() max size, currently 128 KB */ 141 /* don't ask for more than the kmalloc() max size */
142 if (size > 128 * 1024) 142 if (size > KMALLOC_MAX_SIZE)
143 size = 128 * 1024; 143 size = KMALLOC_MAX_SIZE;
144 buf = kmalloc(size, GFP_KERNEL); 144 buf = kmalloc(size, GFP_KERNEL);
145 if (!buf) 145 if (!buf)
146 return -ENOMEM; 146 return -ENOMEM;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 318d8654989b..25b6a887adb9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -211,7 +211,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
211{ 211{
212 struct mm_struct *mm = vma->vm_mm; 212 struct mm_struct *mm = vma->vm_mm;
213 struct file *file = vma->vm_file; 213 struct file *file = vma->vm_file;
214 int flags = vma->vm_flags; 214 vm_flags_t flags = vma->vm_flags;
215 unsigned long ino = 0; 215 unsigned long ino = 0;
216 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
217 unsigned long start, end; 217 unsigned long start, end;
@@ -536,15 +536,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
536 char buffer[PROC_NUMBUF]; 536 char buffer[PROC_NUMBUF];
537 struct mm_struct *mm; 537 struct mm_struct *mm;
538 struct vm_area_struct *vma; 538 struct vm_area_struct *vma;
539 long type; 539 int type;
540 int rv;
540 541
541 memset(buffer, 0, sizeof(buffer)); 542 memset(buffer, 0, sizeof(buffer));
542 if (count > sizeof(buffer) - 1) 543 if (count > sizeof(buffer) - 1)
543 count = sizeof(buffer) - 1; 544 count = sizeof(buffer) - 1;
544 if (copy_from_user(buffer, buf, count)) 545 if (copy_from_user(buffer, buf, count))
545 return -EFAULT; 546 return -EFAULT;
546 if (strict_strtol(strstrip(buffer), 10, &type)) 547 rv = kstrtoint(strstrip(buffer), 10, &type);
547 return -EINVAL; 548 if (rv < 0)
549 return rv;
548 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 550 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
549 return -EINVAL; 551 return -EINVAL;
550 task = get_proc_task(file->f_path.dentry->d_inode); 552 task = get_proc_task(file->f_path.dentry->d_inode);
@@ -769,18 +771,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
769 if (!task) 771 if (!task)
770 goto out; 772 goto out;
771 773
772 mm = mm_for_maps(task);
773 ret = PTR_ERR(mm);
774 if (!mm || IS_ERR(mm))
775 goto out_task;
776
777 ret = -EINVAL; 774 ret = -EINVAL;
778 /* file position must be aligned */ 775 /* file position must be aligned */
779 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 776 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
780 goto out_task; 777 goto out_task;
781 778
782 ret = 0; 779 ret = 0;
783
784 if (!count) 780 if (!count)
785 goto out_task; 781 goto out_task;
786 782
@@ -788,7 +784,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
788 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 784 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
789 ret = -ENOMEM; 785 ret = -ENOMEM;
790 if (!pm.buffer) 786 if (!pm.buffer)
791 goto out_mm; 787 goto out_task;
788
789 mm = mm_for_maps(task);
790 ret = PTR_ERR(mm);
791 if (!mm || IS_ERR(mm))
792 goto out_free;
792 793
793 pagemap_walk.pmd_entry = pagemap_pte_range; 794 pagemap_walk.pmd_entry = pagemap_pte_range;
794 pagemap_walk.pte_hole = pagemap_pte_hole; 795 pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -831,7 +832,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
831 len = min(count, PM_ENTRY_BYTES * pm.pos); 832 len = min(count, PM_ENTRY_BYTES * pm.pos);
832 if (copy_to_user(buf, pm.buffer, len)) { 833 if (copy_to_user(buf, pm.buffer, len)) {
833 ret = -EFAULT; 834 ret = -EFAULT;
834 goto out_free; 835 goto out_mm;
835 } 836 }
836 copied += len; 837 copied += len;
837 buf += len; 838 buf += len;
@@ -841,10 +842,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
841 if (!ret || ret == PM_END_OF_BUFFER) 842 if (!ret || ret == PM_END_OF_BUFFER)
842 ret = copied; 843 ret = copied;
843 844
844out_free:
845 kfree(pm.buffer);
846out_mm: 845out_mm:
847 mmput(mm); 846 mmput(mm);
847out_free:
848 kfree(pm.buffer);
848out_task: 849out_task:
849 put_task_struct(task); 850 put_task_struct(task);
850out: 851out:
@@ -858,7 +859,192 @@ const struct file_operations proc_pagemap_operations = {
858#endif /* CONFIG_PROC_PAGE_MONITOR */ 859#endif /* CONFIG_PROC_PAGE_MONITOR */
859 860
860#ifdef CONFIG_NUMA 861#ifdef CONFIG_NUMA
861extern int show_numa_map(struct seq_file *m, void *v); 862
863struct numa_maps {
864 struct vm_area_struct *vma;
865 unsigned long pages;
866 unsigned long anon;
867 unsigned long active;
868 unsigned long writeback;
869 unsigned long mapcount_max;
870 unsigned long dirty;
871 unsigned long swapcache;
872 unsigned long node[MAX_NUMNODES];
873};
874
875struct numa_maps_private {
876 struct proc_maps_private proc_maps;
877 struct numa_maps md;
878};
879
880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
881{
882 int count = page_mapcount(page);
883
884 md->pages++;
885 if (pte_dirty || PageDirty(page))
886 md->dirty++;
887
888 if (PageSwapCache(page))
889 md->swapcache++;
890
891 if (PageActive(page) || PageUnevictable(page))
892 md->active++;
893
894 if (PageWriteback(page))
895 md->writeback++;
896
897 if (PageAnon(page))
898 md->anon++;
899
900 if (count > md->mapcount_max)
901 md->mapcount_max = count;
902
903 md->node[page_to_nid(page)]++;
904}
905
906static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
907 unsigned long end, struct mm_walk *walk)
908{
909 struct numa_maps *md;
910 spinlock_t *ptl;
911 pte_t *orig_pte;
912 pte_t *pte;
913
914 md = walk->private;
915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
916 do {
917 struct page *page;
918 int nid;
919
920 if (!pte_present(*pte))
921 continue;
922
923 page = vm_normal_page(md->vma, addr, *pte);
924 if (!page)
925 continue;
926
927 if (PageReserved(page))
928 continue;
929
930 nid = page_to_nid(page);
931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
932 continue;
933
934 gather_stats(page, md, pte_dirty(*pte));
935
936 } while (pte++, addr += PAGE_SIZE, addr != end);
937 pte_unmap_unlock(orig_pte, ptl);
938 return 0;
939}
940#ifdef CONFIG_HUGETLB_PAGE
941static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
942 unsigned long addr, unsigned long end, struct mm_walk *walk)
943{
944 struct numa_maps *md;
945 struct page *page;
946
947 if (pte_none(*pte))
948 return 0;
949
950 page = pte_page(*pte);
951 if (!page)
952 return 0;
953
954 md = walk->private;
955 gather_stats(page, md, pte_dirty(*pte));
956 return 0;
957}
958
959#else
960static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
961 unsigned long addr, unsigned long end, struct mm_walk *walk)
962{
963 return 0;
964}
965#endif
966
967/*
968 * Display pages allocated per node and memory policy via /proc.
969 */
970static int show_numa_map(struct seq_file *m, void *v)
971{
972 struct numa_maps_private *numa_priv = m->private;
973 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
974 struct vm_area_struct *vma = v;
975 struct numa_maps *md = &numa_priv->md;
976 struct file *file = vma->vm_file;
977 struct mm_struct *mm = vma->vm_mm;
978 struct mm_walk walk = {};
979 struct mempolicy *pol;
980 int n;
981 char buffer[50];
982
983 if (!mm)
984 return 0;
985
986 /* Ensure we start with an empty set of numa_maps statistics. */
987 memset(md, 0, sizeof(*md));
988
989 md->vma = vma;
990
991 walk.hugetlb_entry = gather_hugetbl_stats;
992 walk.pmd_entry = gather_pte_stats;
993 walk.private = md;
994 walk.mm = mm;
995
996 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
997 mpol_to_str(buffer, sizeof(buffer), pol, 0);
998 mpol_cond_put(pol);
999
1000 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1001
1002 if (file) {
1003 seq_printf(m, " file=");
1004 seq_path(m, &file->f_path, "\n\t= ");
1005 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1006 seq_printf(m, " heap");
1007 } else if (vma->vm_start <= mm->start_stack &&
1008 vma->vm_end >= mm->start_stack) {
1009 seq_printf(m, " stack");
1010 }
1011
1012 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1013
1014 if (!md->pages)
1015 goto out;
1016
1017 if (md->anon)
1018 seq_printf(m, " anon=%lu", md->anon);
1019
1020 if (md->dirty)
1021 seq_printf(m, " dirty=%lu", md->dirty);
1022
1023 if (md->pages != md->anon && md->pages != md->dirty)
1024 seq_printf(m, " mapped=%lu", md->pages);
1025
1026 if (md->mapcount_max > 1)
1027 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1028
1029 if (md->swapcache)
1030 seq_printf(m, " swapcache=%lu", md->swapcache);
1031
1032 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1033 seq_printf(m, " active=%lu", md->active);
1034
1035 if (md->writeback)
1036 seq_printf(m, " writeback=%lu", md->writeback);
1037
1038 for_each_node_state(n, N_HIGH_MEMORY)
1039 if (md->node[n])
1040 seq_printf(m, " N%d=%lu", n, md->node[n]);
1041out:
1042 seq_putc(m, '\n');
1043
1044 if (m->count < m->size)
1045 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1046 return 0;
1047}
862 1048
863static const struct seq_operations proc_pid_numa_maps_op = { 1049static const struct seq_operations proc_pid_numa_maps_op = {
864 .start = m_start, 1050 .start = m_start,
@@ -869,7 +1055,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
869 1055
870static int numa_maps_open(struct inode *inode, struct file *file) 1056static int numa_maps_open(struct inode *inode, struct file *file)
871{ 1057{
872 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 1058 struct numa_maps_private *priv;
1059 int ret = -ENOMEM;
1060 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1061 if (priv) {
1062 priv->proc_maps.pid = proc_pid(inode);
1063 ret = seq_open(file, &proc_pid_numa_maps_op);
1064 if (!ret) {
1065 struct seq_file *m = file->private_data;
1066 m->private = priv;
1067 } else {
1068 kfree(priv);
1069 }
1070 }
1071 return ret;
873} 1072}
874 1073
875const struct file_operations proc_numa_maps_operations = { 1074const struct file_operations proc_numa_maps_operations = {
@@ -878,4 +1077,4 @@ const struct file_operations proc_numa_maps_operations = {
878 .llseek = seq_lseek, 1077 .llseek = seq_lseek,
879 .release = seq_release_private, 1078 .release = seq_release_private,
880}; 1079};
881#endif 1080#endif /* CONFIG_NUMA */
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 74802bc5ded9..cd99bf557650 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -35,6 +35,46 @@ static u64 vmcore_size;
35 35
36static struct proc_dir_entry *proc_vmcore = NULL; 36static struct proc_dir_entry *proc_vmcore = NULL;
37 37
38/*
39 * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
40 * The called function has to take care of module refcounting.
41 */
42static int (*oldmem_pfn_is_ram)(unsigned long pfn);
43
44int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
45{
46 if (oldmem_pfn_is_ram)
47 return -EBUSY;
48 oldmem_pfn_is_ram = fn;
49 return 0;
50}
51EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
52
53void unregister_oldmem_pfn_is_ram(void)
54{
55 oldmem_pfn_is_ram = NULL;
56 wmb();
57}
58EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
59
60static int pfn_is_ram(unsigned long pfn)
61{
62 int (*fn)(unsigned long pfn);
63 /* pfn is ram unless fn() checks pagetype */
64 int ret = 1;
65
66 /*
67 * Ask hypervisor if the pfn is really ram.
68 * A ballooned page contains no data and reading from such a page
69 * will cause high load in the hypervisor.
70 */
71 fn = oldmem_pfn_is_ram;
72 if (fn)
73 ret = fn(pfn);
74
75 return ret;
76}
77
38/* Reads a page from the oldmem device from given offset. */ 78/* Reads a page from the oldmem device from given offset. */
39static ssize_t read_from_oldmem(char *buf, size_t count, 79static ssize_t read_from_oldmem(char *buf, size_t count,
40 u64 *ppos, int userbuf) 80 u64 *ppos, int userbuf)
@@ -55,9 +95,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
55 else 95 else
56 nr_bytes = count; 96 nr_bytes = count;
57 97
58 tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); 98 /* If pfn is not ram, return zeros for sparse dump files */
59 if (tmp < 0) 99 if (pfn_is_ram(pfn) == 0)
60 return tmp; 100 memset(buf, 0, nr_bytes);
101 else {
102 tmp = copy_oldmem_page(pfn, buf, nr_bytes,
103 offset, userbuf);
104 if (tmp < 0)
105 return tmp;
106 }
61 *ppos += nr_bytes; 107 *ppos += nr_bytes;
62 count -= nr_bytes; 108 count -= nr_bytes;
63 buf += nr_bytes; 109 buf += nr_bytes;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
152void pstore_get_records(void) 152void pstore_get_records(void)
153{ 153{
154 struct pstore_info *psi = psinfo; 154 struct pstore_info *psi = psinfo;
155 size_t size; 155 ssize_t size;
156 u64 id; 156 u64 id;
157 enum pstore_type_id type; 157 enum pstore_type_id type;
158 struct timespec time; 158 struct timespec time;
159 int failed = 0; 159 int failed = 0, rc;
160 160
161 if (!psi) 161 if (!psi)
162 return; 162 return;
163 163
164 mutex_lock(&psinfo->buf_mutex); 164 mutex_lock(&psinfo->buf_mutex);
165 rc = psi->open(psi);
166 if (rc)
167 goto out;
168
165 while ((size = psi->read(&id, &type, &time)) > 0) { 169 while ((size = psi->read(&id, &type, &time)) > 0) {
166 if (pstore_mkfile(type, psi->name, id, psi->buf, size, 170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
167 time, psi->erase)) 171 time, psi->erase))
168 failed++; 172 failed++;
169 } 173 }
174 psi->close(psi);
175out:
170 mutex_unlock(&psinfo->buf_mutex); 176 mutex_unlock(&psinfo->buf_mutex);
171 177
172 if (failed) 178 if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
691 * This is called from kswapd when we think we need some 691 * This is called from kswapd when we think we need some
692 * more memory 692 * more memory
693 */ 693 */
694static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 694static int shrink_dqcache_memory(struct shrinker *shrink,
695 struct shrink_control *sc)
695{ 696{
697 int nr = sc->nr_to_scan;
698
696 if (nr) { 699 if (nr) {
697 spin_lock(&dq_list_lock); 700 spin_lock(&dq_list_lock);
698 prune_dqcache(nr); 701 prune_dqcache(nr);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 118662690cdf..76c8164d5651 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -831,6 +831,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
831 INITIALIZE_PATH(path); 831 INITIALIZE_PATH(path);
832 struct reiserfs_dir_entry de; 832 struct reiserfs_dir_entry de;
833 833
834 dentry_unhash(dentry);
835
834 /* we will be doing 2 balancings and update 2 stat data, we change quotas 836 /* we will be doing 2 balancings and update 2 stat data, we change quotas
835 * of the owner of the directory and of the owner of the parent directory. 837 * of the owner of the directory and of the owner of the parent directory.
836 * The quota structure is possibly deleted only on last iput => outside 838 * The quota structure is possibly deleted only on last iput => outside
@@ -1225,6 +1227,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1225 unsigned long savelink = 1; 1227 unsigned long savelink = 1;
1226 struct timespec ctime; 1228 struct timespec ctime;
1227 1229
1230 if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode))
1231 dentry_unhash(new_dentry);
1232
1228 /* three balancings: (1) old name removal, (2) new name insertion 1233 /* three balancings: (1) old name removal, (2) new name insertion
1229 and (3) maybe "save" link insertion 1234 and (3) maybe "save" link insertion
1230 stat data updates: (1) old directory, 1235 stat data updates: (1) old directory,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 47d2a4498b03..50f1abccd1cd 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -105,7 +105,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
105 mutex_unlock(&dentry->d_inode->i_mutex); 105 mutex_unlock(&dentry->d_inode->i_mutex);
106 if (!error) 106 if (!error)
107 d_delete(dentry); 107 d_delete(dentry);
108 dput(dentry);
109 108
110 return error; 109 return error;
111} 110}
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da16..aa866d309695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
162 .get = generic_pipe_buf_get, 162 .get = generic_pipe_buf_get,
163}; 163};
164 164
165static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
166{
167 smp_mb();
168 if (waitqueue_active(&pipe->wait))
169 wake_up_interruptible(&pipe->wait);
170 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
171}
172
165/** 173/**
166 * splice_to_pipe - fill passed data into a pipe 174 * splice_to_pipe - fill passed data into a pipe
167 * @pipe: pipe to fill 175 * @pipe: pipe to fill
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
247 255
248 pipe_unlock(pipe); 256 pipe_unlock(pipe);
249 257
250 if (do_wakeup) { 258 if (do_wakeup)
251 smp_mb(); 259 wakeup_pipe_readers(pipe);
252 if (waitqueue_active(&pipe->wait))
253 wake_up_interruptible(&pipe->wait);
254 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
255 }
256 260
257 while (page_nr < spd_pages) 261 while (page_nr < spd_pages)
258 spd->spd_release(spd, page_nr++); 262 spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@ retry:
1892 /* 1896 /*
1893 * If we put data in the output pipe, wakeup any potential readers. 1897 * If we put data in the output pipe, wakeup any potential readers.
1894 */ 1898 */
1895 if (ret > 0) { 1899 if (ret > 0)
1896 smp_mb(); 1900 wakeup_pipe_readers(opipe);
1897 if (waitqueue_active(&opipe->wait)) 1901
1898 wake_up_interruptible(&opipe->wait);
1899 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1900 }
1901 if (input_wakeup) 1902 if (input_wakeup)
1902 wakeup_pipe_writers(ipipe); 1903 wakeup_pipe_writers(ipipe);
1903 1904
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1976 /* 1977 /*
1977 * If we put data in the output pipe, wakeup any potential readers. 1978 * If we put data in the output pipe, wakeup any potential readers.
1978 */ 1979 */
1979 if (ret > 0) { 1980 if (ret > 0)
1980 smp_mb(); 1981 wakeup_pipe_readers(opipe);
1981 if (waitqueue_active(&opipe->wait))
1982 wake_up_interruptible(&opipe->wait);
1983 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1984 }
1985 1982
1986 return ret; 1983 return ret;
1987} 1984}
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
42 select LZO_DECOMPRESS 42 select LZO_DECOMPRESS
43 help 43 help
44 Saying Y here includes support for reading Squashfs file systems 44 Saying Y here includes support for reading Squashfs file systems
45 compressed with LZO compresssion. LZO compression is mainly 45 compressed with LZO compression. LZO compression is mainly
46 aimed at embedded systems with slower CPUs where the overheads 46 aimed at embedded systems with slower CPUs where the overheads
47 of zlib are too high. 47 of zlib are too high.
48 48
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
57 select XZ_DEC 57 select XZ_DEC
58 help 58 help
59 Saying Y here includes support for reading Squashfs file systems 59 Saying Y here includes support for reading Squashfs file systems
60 compressed with XZ compresssion. XZ gives better compression than 60 compressed with XZ compression. XZ gives better compression than
61 the default zlib compression, at the expense of greater CPU and 61 the default zlib compression, at the expense of greater CPU and
62 memory overhead. 62 memory overhead.
63 63
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 8ab48bc2fa7d..ed0eb2a921f4 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..f744be98cd5a 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -29,7 +29,7 @@
29 * plus functions layered ontop of the generic cache implementation to 29 * plus functions layered ontop of the generic cache implementation to
30 * access the metadata and fragment caches. 30 * access the metadata and fragment caches.
31 * 31 *
32 * To avoid out of memory and fragmentation isssues with vmalloc the cache 32 * To avoid out of memory and fragmentation issues with vmalloc the cache
33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers. 33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
34 * 34 *
35 * It should be noted that the cache is not used for file datablocks, these 35 * It should be noted that the cache is not used for file datablocks, these
@@ -393,19 +393,36 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
393/* 393/*
394 * Read a filesystem table (uncompressed sequence of bytes) from disk 394 * Read a filesystem table (uncompressed sequence of bytes) from disk
395 */ 395 */
396int squashfs_read_table(struct super_block *sb, void *buffer, u64 block, 396void *squashfs_read_table(struct super_block *sb, u64 block, int length)
397 int length)
398{ 397{
399 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 398 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
400 int i, res; 399 int i, res;
401 void **data = kcalloc(pages, sizeof(void *), GFP_KERNEL); 400 void *table, *buffer, **data;
402 if (data == NULL) 401
403 return -ENOMEM; 402 table = buffer = kmalloc(length, GFP_KERNEL);
403 if (table == NULL)
404 return ERR_PTR(-ENOMEM);
405
406 data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
407 if (data == NULL) {
408 res = -ENOMEM;
409 goto failed;
410 }
404 411
405 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) 412 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
406 data[i] = buffer; 413 data[i] = buffer;
414
407 res = squashfs_read_data(sb, data, block, length | 415 res = squashfs_read_data(sb, data, block, length |
408 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); 416 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
417
409 kfree(data); 418 kfree(data);
410 return res; 419
420 if (res < 0)
421 goto failed;
422
423 return table;
424
425failed:
426 kfree(table);
427 return ERR_PTR(res);
411} 428}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e921bd213738..9f1b0bb96f13 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 099745ad5691..8ba70cff09a6 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -4,7 +4,7 @@
4 * Squashfs - a compressed read only filesystem for Linux 4 * Squashfs - a compressed read only filesystem for Linux
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 3f79cd1d0c19..9dfe2ce0fb70 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 7f93d5a9ee05..730c56248c9b 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -121,30 +121,38 @@ static struct dentry *squashfs_get_parent(struct dentry *child)
121 * Read uncompressed inode lookup table indexes off disk into memory 121 * Read uncompressed inode lookup table indexes off disk into memory
122 */ 122 */
123__le64 *squashfs_read_inode_lookup_table(struct super_block *sb, 123__le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
124 u64 lookup_table_start, unsigned int inodes) 124 u64 lookup_table_start, u64 next_table, unsigned int inodes)
125{ 125{
126 unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); 126 unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
127 __le64 *inode_lookup_table; 127 __le64 *table;
128 int err;
129 128
130 TRACE("In read_inode_lookup_table, length %d\n", length); 129 TRACE("In read_inode_lookup_table, length %d\n", length);
131 130
132 /* Allocate inode lookup table indexes */ 131 /* Sanity check values */
133 inode_lookup_table = kmalloc(length, GFP_KERNEL); 132
134 if (inode_lookup_table == NULL) { 133 /* there should always be at least one inode */
135 ERROR("Failed to allocate inode lookup table\n"); 134 if (inodes == 0)
136 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-EINVAL);
137 } 136
137 /* length bytes should not extend into the next table - this check
138 * also traps instances where lookup_table_start is incorrectly larger
139 * than the next table start
140 */
141 if (lookup_table_start + length > next_table)
142 return ERR_PTR(-EINVAL);
143
144 table = squashfs_read_table(sb, lookup_table_start, length);
138 145
139 err = squashfs_read_table(sb, inode_lookup_table, lookup_table_start, 146 /*
140 length); 147 * table[0] points to the first inode lookup table metadata block,
141 if (err < 0) { 148 * this should be less than lookup_table_start
142 ERROR("unable to read inode lookup table\n"); 149 */
143 kfree(inode_lookup_table); 150 if (!IS_ERR(table) && table[0] >= lookup_table_start) {
144 return ERR_PTR(err); 151 kfree(table);
152 return ERR_PTR(-EINVAL);
145 } 153 }
146 154
147 return inode_lookup_table; 155 return table;
148} 156}
149 157
150 158
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index a25c5060bdcb..38bb1c640559 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 7eef571443c6..1516a6490bfb 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -71,26 +71,29 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
71 * Read the uncompressed fragment lookup table indexes off disk into memory 71 * Read the uncompressed fragment lookup table indexes off disk into memory
72 */ 72 */
73__le64 *squashfs_read_fragment_index_table(struct super_block *sb, 73__le64 *squashfs_read_fragment_index_table(struct super_block *sb,
74 u64 fragment_table_start, unsigned int fragments) 74 u64 fragment_table_start, u64 next_table, unsigned int fragments)
75{ 75{
76 unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments); 76 unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments);
77 __le64 *fragment_index; 77 __le64 *table;
78 int err;
79 78
80 /* Allocate fragment lookup table indexes */ 79 /*
81 fragment_index = kmalloc(length, GFP_KERNEL); 80 * Sanity check, length bytes should not extend into the next table -
82 if (fragment_index == NULL) { 81 * this check also traps instances where fragment_table_start is
83 ERROR("Failed to allocate fragment index table\n"); 82 * incorrectly larger than the next table start
84 return ERR_PTR(-ENOMEM); 83 */
85 } 84 if (fragment_table_start + length > next_table)
85 return ERR_PTR(-EINVAL);
86
87 table = squashfs_read_table(sb, fragment_table_start, length);
86 88
87 err = squashfs_read_table(sb, fragment_index, fragment_table_start, 89 /*
88 length); 90 * table[0] points to the first fragment table metadata block, this
89 if (err < 0) { 91 * should be less than fragment_table_start
90 ERROR("unable to read fragment index table\n"); 92 */
91 kfree(fragment_index); 93 if (!IS_ERR(table) && table[0] >= fragment_table_start) {
92 return ERR_PTR(err); 94 kfree(table);
95 return ERR_PTR(-EINVAL);
93 } 96 }
94 97
95 return fragment_index; 98 return table;
96} 99}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d8f32452638e..a70858e0fb44 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -66,27 +66,37 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
66 * Read uncompressed id lookup table indexes from disk into memory 66 * Read uncompressed id lookup table indexes from disk into memory
67 */ 67 */
68__le64 *squashfs_read_id_index_table(struct super_block *sb, 68__le64 *squashfs_read_id_index_table(struct super_block *sb,
69 u64 id_table_start, unsigned short no_ids) 69 u64 id_table_start, u64 next_table, unsigned short no_ids)
70{ 70{
71 unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); 71 unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
72 __le64 *id_table; 72 __le64 *table;
73 int err;
74 73
75 TRACE("In read_id_index_table, length %d\n", length); 74 TRACE("In read_id_index_table, length %d\n", length);
76 75
77 /* Allocate id lookup table indexes */ 76 /* Sanity check values */
78 id_table = kmalloc(length, GFP_KERNEL); 77
79 if (id_table == NULL) { 78 /* there should always be at least one id */
80 ERROR("Failed to allocate id index table\n"); 79 if (no_ids == 0)
81 return ERR_PTR(-ENOMEM); 80 return ERR_PTR(-EINVAL);
82 } 81
82 /*
83 * length bytes should not extend into the next table - this check
84 * also traps instances where id_table_start is incorrectly larger
85 * than the next table start
86 */
87 if (id_table_start + length > next_table)
88 return ERR_PTR(-EINVAL);
89
90 table = squashfs_read_table(sb, id_table_start, length);
83 91
84 err = squashfs_read_table(sb, id_table, id_table_start, length); 92 /*
85 if (err < 0) { 93 * table[0] points to the first id lookup table metadata block, this
86 ERROR("unable to read id index table\n"); 94 * should be less than id_table_start
87 kfree(id_table); 95 */
88 return ERR_PTR(err); 96 if (!IS_ERR(table) && table[0] >= id_table_start) {
97 kfree(table);
98 return ERR_PTR(-EINVAL);
89 } 99 }
90 100
91 return id_table; 101 return table;
92} 102}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 62e63ad25075..04bebcaa2373 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 5d922a6701ab..4bc63ac64bc0 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 1f2e608b8785..e3be6a71cfa7 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -44,24 +44,24 @@ extern struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *,
44 u64, int); 44 u64, int);
45extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *, 45extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *,
46 u64, int); 46 u64, int);
47extern int squashfs_read_table(struct super_block *, void *, u64, int); 47extern void *squashfs_read_table(struct super_block *, u64, int);
48 48
49/* decompressor.c */ 49/* decompressor.c */
50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); 50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
51extern void *squashfs_decompressor_init(struct super_block *, unsigned short); 51extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
52 52
53/* export.c */ 53/* export.c */
54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
55 unsigned int); 55 unsigned int);
56 56
57/* fragment.c */ 57/* fragment.c */
58extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); 58extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
59extern __le64 *squashfs_read_fragment_index_table(struct super_block *, 59extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
60 u64, unsigned int); 60 u64, u64, unsigned int);
61 61
62/* id.c */ 62/* id.c */
63extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); 63extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
64extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, 64extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
65 unsigned short); 65 unsigned short);
66 66
67/* inode.c */ 67/* inode.c */
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 4582c568ef4d..b4a4e539a08c 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index 359baefc01fc..73588e7700ed 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index d9037a5215f0..651f0b31d296 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -4,7 +4,7 @@
4 * Squashfs 4 * Squashfs
5 * 5 *
6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
7 * Phillip Lougher <phillip@lougher.demon.co.uk> 7 * Phillip Lougher <phillip@squashfs.org.uk>
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5c8184c061a4..6f26abee3597 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -83,7 +83,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
83 long long root_inode; 83 long long root_inode;
84 unsigned short flags; 84 unsigned short flags;
85 unsigned int fragments; 85 unsigned int fragments;
86 u64 lookup_table_start, xattr_id_table_start; 86 u64 lookup_table_start, xattr_id_table_start, next_table;
87 int err; 87 int err;
88 88
89 TRACE("Entered squashfs_fill_superblock\n"); 89 TRACE("Entered squashfs_fill_superblock\n");
@@ -95,12 +95,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
95 } 95 }
96 msblk = sb->s_fs_info; 96 msblk = sb->s_fs_info;
97 97
98 sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
99 if (sblk == NULL) {
100 ERROR("Failed to allocate squashfs_super_block\n");
101 goto failure;
102 }
103
104 msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE); 98 msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
105 msblk->devblksize_log2 = ffz(~msblk->devblksize); 99 msblk->devblksize_log2 = ffz(~msblk->devblksize);
106 100
@@ -114,10 +108,12 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
114 * of bytes_used) we need to set it to an initial sensible dummy value 108 * of bytes_used) we need to set it to an initial sensible dummy value
115 */ 109 */
116 msblk->bytes_used = sizeof(*sblk); 110 msblk->bytes_used = sizeof(*sblk);
117 err = squashfs_read_table(sb, sblk, SQUASHFS_START, sizeof(*sblk)); 111 sblk = squashfs_read_table(sb, SQUASHFS_START, sizeof(*sblk));
118 112
119 if (err < 0) { 113 if (IS_ERR(sblk)) {
120 ERROR("unable to read squashfs_super_block\n"); 114 ERROR("unable to read squashfs_super_block\n");
115 err = PTR_ERR(sblk);
116 sblk = NULL;
121 goto failed_mount; 117 goto failed_mount;
122 } 118 }
123 119
@@ -218,18 +214,61 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
218 goto failed_mount; 214 goto failed_mount;
219 } 215 }
220 216
217 /* Handle xattrs */
218 sb->s_xattr = squashfs_xattr_handlers;
219 xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start);
220 if (xattr_id_table_start == SQUASHFS_INVALID_BLK) {
221 next_table = msblk->bytes_used;
222 goto allocate_id_index_table;
223 }
224
225 /* Allocate and read xattr id lookup table */
226 msblk->xattr_id_table = squashfs_read_xattr_id_table(sb,
227 xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids);
228 if (IS_ERR(msblk->xattr_id_table)) {
229 ERROR("unable to read xattr id index table\n");
230 err = PTR_ERR(msblk->xattr_id_table);
231 msblk->xattr_id_table = NULL;
232 if (err != -ENOTSUPP)
233 goto failed_mount;
234 }
235 next_table = msblk->xattr_table;
236
237allocate_id_index_table:
221 /* Allocate and read id index table */ 238 /* Allocate and read id index table */
222 msblk->id_table = squashfs_read_id_index_table(sb, 239 msblk->id_table = squashfs_read_id_index_table(sb,
223 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); 240 le64_to_cpu(sblk->id_table_start), next_table,
241 le16_to_cpu(sblk->no_ids));
224 if (IS_ERR(msblk->id_table)) { 242 if (IS_ERR(msblk->id_table)) {
243 ERROR("unable to read id index table\n");
225 err = PTR_ERR(msblk->id_table); 244 err = PTR_ERR(msblk->id_table);
226 msblk->id_table = NULL; 245 msblk->id_table = NULL;
227 goto failed_mount; 246 goto failed_mount;
228 } 247 }
248 next_table = msblk->id_table[0];
249
250 /* Handle inode lookup table */
251 lookup_table_start = le64_to_cpu(sblk->lookup_table_start);
252 if (lookup_table_start == SQUASHFS_INVALID_BLK)
253 goto handle_fragments;
254
255 /* Allocate and read inode lookup table */
256 msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb,
257 lookup_table_start, next_table, msblk->inodes);
258 if (IS_ERR(msblk->inode_lookup_table)) {
259 ERROR("unable to read inode lookup table\n");
260 err = PTR_ERR(msblk->inode_lookup_table);
261 msblk->inode_lookup_table = NULL;
262 goto failed_mount;
263 }
264 next_table = msblk->inode_lookup_table[0];
229 265
266 sb->s_export_op = &squashfs_export_ops;
267
268handle_fragments:
230 fragments = le32_to_cpu(sblk->fragments); 269 fragments = le32_to_cpu(sblk->fragments);
231 if (fragments == 0) 270 if (fragments == 0)
232 goto allocate_lookup_table; 271 goto check_directory_table;
233 272
234 msblk->fragment_cache = squashfs_cache_init("fragment", 273 msblk->fragment_cache = squashfs_cache_init("fragment",
235 SQUASHFS_CACHED_FRAGMENTS, msblk->block_size); 274 SQUASHFS_CACHED_FRAGMENTS, msblk->block_size);
@@ -240,45 +279,29 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
240 279
241 /* Allocate and read fragment index table */ 280 /* Allocate and read fragment index table */
242 msblk->fragment_index = squashfs_read_fragment_index_table(sb, 281 msblk->fragment_index = squashfs_read_fragment_index_table(sb,
243 le64_to_cpu(sblk->fragment_table_start), fragments); 282 le64_to_cpu(sblk->fragment_table_start), next_table, fragments);
244 if (IS_ERR(msblk->fragment_index)) { 283 if (IS_ERR(msblk->fragment_index)) {
284 ERROR("unable to read fragment index table\n");
245 err = PTR_ERR(msblk->fragment_index); 285 err = PTR_ERR(msblk->fragment_index);
246 msblk->fragment_index = NULL; 286 msblk->fragment_index = NULL;
247 goto failed_mount; 287 goto failed_mount;
248 } 288 }
289 next_table = msblk->fragment_index[0];
249 290
250allocate_lookup_table: 291check_directory_table:
251 lookup_table_start = le64_to_cpu(sblk->lookup_table_start); 292 /* Sanity check directory_table */
252 if (lookup_table_start == SQUASHFS_INVALID_BLK) 293 if (msblk->directory_table >= next_table) {
253 goto allocate_xattr_table; 294 err = -EINVAL;
254
255 /* Allocate and read inode lookup table */
256 msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb,
257 lookup_table_start, msblk->inodes);
258 if (IS_ERR(msblk->inode_lookup_table)) {
259 err = PTR_ERR(msblk->inode_lookup_table);
260 msblk->inode_lookup_table = NULL;
261 goto failed_mount; 295 goto failed_mount;
262 } 296 }
263 297
264 sb->s_export_op = &squashfs_export_ops; 298 /* Sanity check inode_table */
265 299 if (msblk->inode_table >= msblk->directory_table) {
266allocate_xattr_table: 300 err = -EINVAL;
267 sb->s_xattr = squashfs_xattr_handlers; 301 goto failed_mount;
268 xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start);
269 if (xattr_id_table_start == SQUASHFS_INVALID_BLK)
270 goto allocate_root;
271
272 /* Allocate and read xattr id lookup table */
273 msblk->xattr_id_table = squashfs_read_xattr_id_table(sb,
274 xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids);
275 if (IS_ERR(msblk->xattr_id_table)) {
276 err = PTR_ERR(msblk->xattr_id_table);
277 msblk->xattr_id_table = NULL;
278 if (err != -ENOTSUPP)
279 goto failed_mount;
280 } 302 }
281allocate_root: 303
304 /* allocate root */
282 root = new_inode(sb); 305 root = new_inode(sb);
283 if (!root) { 306 if (!root) {
284 err = -ENOMEM; 307 err = -ENOMEM;
@@ -318,11 +341,6 @@ failed_mount:
318 sb->s_fs_info = NULL; 341 sb->s_fs_info = NULL;
319 kfree(sblk); 342 kfree(sblk);
320 return err; 343 return err;
321
322failure:
323 kfree(sb->s_fs_info);
324 sb->s_fs_info = NULL;
325 return -ENOMEM;
326} 344}
327 345
328 346
@@ -475,5 +493,5 @@ static const struct super_operations squashfs_super_ops = {
475module_init(init_squashfs_fs); 493module_init(init_squashfs_fs);
476module_exit(exit_squashfs_fs); 494module_exit(exit_squashfs_fs);
477MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem"); 495MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem");
478MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>"); 496MODULE_AUTHOR("Phillip Lougher <phillip@squashfs.org.uk>");
479MODULE_LICENSE("GPL"); 497MODULE_LICENSE("GPL");
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index ec86434921e1..1191817264cc 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 3876c36699a1..92fcde7b4d61 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index b634efce4bde..c83f5d9ec125 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -31,6 +31,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
31 u64 start, u64 *xattr_table_start, int *xattr_ids) 31 u64 start, u64 *xattr_table_start, int *xattr_ids)
32{ 32{
33 ERROR("Xattrs in filesystem, these will be ignored\n"); 33 ERROR("Xattrs in filesystem, these will be ignored\n");
34 *xattr_table_start = start;
34 return ERR_PTR(-ENOTSUPP); 35 return ERR_PTR(-ENOTSUPP);
35} 36}
36 37
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 05385dbe1465..c89607d690c4 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2010 4 * Copyright (c) 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -67,34 +67,29 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
67 u64 *xattr_table_start, int *xattr_ids) 67 u64 *xattr_table_start, int *xattr_ids)
68{ 68{
69 unsigned int len; 69 unsigned int len;
70 __le64 *xid_table; 70 struct squashfs_xattr_id_table *id_table;
71 struct squashfs_xattr_id_table id_table; 71
72 int err; 72 id_table = squashfs_read_table(sb, start, sizeof(*id_table));
73 if (IS_ERR(id_table))
74 return (__le64 *) id_table;
75
76 *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
77 *xattr_ids = le32_to_cpu(id_table->xattr_ids);
78 kfree(id_table);
79
80 /* Sanity check values */
81
82 /* there is always at least one xattr id */
83 if (*xattr_ids == 0)
84 return ERR_PTR(-EINVAL);
85
86 /* xattr_table should be less than start */
87 if (*xattr_table_start >= start)
88 return ERR_PTR(-EINVAL);
73 89
74 err = squashfs_read_table(sb, &id_table, start, sizeof(id_table));
75 if (err < 0) {
76 ERROR("unable to read xattr id table\n");
77 return ERR_PTR(err);
78 }
79 *xattr_table_start = le64_to_cpu(id_table.xattr_table_start);
80 *xattr_ids = le32_to_cpu(id_table.xattr_ids);
81 len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); 90 len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
82 91
83 TRACE("In read_xattr_index_table, length %d\n", len); 92 TRACE("In read_xattr_index_table, length %d\n", len);
84 93
85 /* Allocate xattr id lookup table indexes */ 94 return squashfs_read_table(sb, start + sizeof(*id_table), len);
86 xid_table = kmalloc(len, GFP_KERNEL);
87 if (xid_table == NULL) {
88 ERROR("Failed to allocate xattr id index table\n");
89 return ERR_PTR(-ENOMEM);
90 }
91
92 err = squashfs_read_table(sb, xid_table, start + sizeof(id_table), len);
93 if (err < 0) {
94 ERROR("unable to read xattr id index table\n");
95 kfree(xid_table);
96 return ERR_PTR(err);
97 }
98
99 return xid_table;
100} 95}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index aa47a286d1f8..1760b7d108f6 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 517688b32ffa..55d918fd2d86 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -2,7 +2,7 @@
2 * Squashfs - a compressed read only filesystem for Linux 2 * Squashfs - a compressed read only filesystem for Linux
3 * 3 *
4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 4 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
5 * Phillip Lougher <phillip@lougher.demon.co.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c75593953c52 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,6 +31,7 @@
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h>
34#include "internal.h" 35#include "internal.h"
35 36
36 37
@@ -112,6 +113,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
112 s->s_maxbytes = MAX_NON_LFS; 113 s->s_maxbytes = MAX_NON_LFS;
113 s->s_op = &default_op; 114 s->s_op = &default_op;
114 s->s_time_gran = 1000000000; 115 s->s_time_gran = 1000000000;
116 s->cleancache_poolid = -1;
115 } 117 }
116out: 118out:
117 return s; 119 return s;
@@ -177,6 +179,7 @@ void deactivate_locked_super(struct super_block *s)
177{ 179{
178 struct file_system_type *fs = s->s_type; 180 struct file_system_type *fs = s->s_type;
179 if (atomic_dec_and_test(&s->s_active)) { 181 if (atomic_dec_and_test(&s->s_active)) {
182 cleancache_flush_fs(s);
180 fs->kill_sb(s); 183 fs->kill_sb(s);
181 /* 184 /*
182 * We need to call rcu_barrier so all the delayed rcu free 185 * We need to call rcu_barrier so all the delayed rcu free
@@ -948,8 +951,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 951 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
949 * but s_maxbytes was an unsigned long long for many releases. Throw 952 * but s_maxbytes was an unsigned long long for many releases. Throw
950 * this warning for a little while to try and catch filesystems that 953 * this warning for a little while to try and catch filesystems that
951 * violate this rule. This warning should be either removed or 954 * violate this rule.
952 * converted to a BUG() in 2.6.34.
953 */ 955 */
954 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 956 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
955 "negative value (%lld)\n", type->name, sb->s_maxbytes); 957 "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
24 24
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
34/* 27/*
35 * There's one sysfs_buffer for each open file and one 28 * There's one sysfs_buffer for each open file and one
36 * sysfs_open_dirent for each sysfs_dirent with one or more open 29 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
337 struct sysfs_buffer *buffer; 330 struct sysfs_buffer *buffer;
338 const struct sysfs_ops *ops; 331 const struct sysfs_ops *ops;
339 int error = -EACCES; 332 int error = -EACCES;
340 char *p;
341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 333
346 /* need attr_sd for attr and ops, its parent for kobj */ 334 /* need attr_sd for attr and ops, its parent for kobj */
347 if (!sysfs_get_active(attr_sd)) 335 if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
101} 101}
102 102
103/** 103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group 104 * sysfs_update_group - given a directory kobject, update an attribute group
105 * @kobj: The kobject to create the group on 105 * @kobj: The kobject to update the group on
106 * @grp: The attribute group to create 106 * @grp: The attribute group to update
107 * 107 *
108 * This function updates an attribute group. Unlike 108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any 109 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e474fbcf8bde..e2cc6756f3b1 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -196,6 +196,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
196 struct inode *inode = dentry->d_inode; 196 struct inode *inode = dentry->d_inode;
197 int err = -ENOTEMPTY; 197 int err = -ENOTEMPTY;
198 198
199 dentry_unhash(dentry);
200
199 if (sysv_empty_dir(inode)) { 201 if (sysv_empty_dir(inode)) {
200 err = sysv_unlink(dir, dentry); 202 err = sysv_unlink(dir, dentry);
201 if (!err) { 203 if (!err) {
@@ -222,6 +224,9 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
222 struct sysv_dir_entry * old_de; 224 struct sysv_dir_entry * old_de;
223 int err = -ENOENT; 225 int err = -ENOENT;
224 226
227 if (new_inode && S_ISDIR(new_inode->i_mode))
228 dentry_unhash(new_dentry);
229
225 old_de = sysv_find_entry(old_dentry, &old_page); 230 old_de = sysv_find_entry(old_dentry, &old_page);
226 if (!old_de) 231 if (!old_de)
227 goto out; 232 goto out;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/rcupdate.h>
25 26
26struct timerfd_ctx { 27struct timerfd_ctx {
27 struct hrtimer tmr; 28 struct hrtimer tmr;
28 ktime_t tintv; 29 ktime_t tintv;
30 ktime_t moffs;
29 wait_queue_head_t wqh; 31 wait_queue_head_t wqh;
30 u64 ticks; 32 u64 ticks;
31 int expired; 33 int expired;
32 int clockid; 34 int clockid;
35 struct rcu_head rcu;
36 struct list_head clist;
37 bool might_cancel;
33}; 38};
34 39
40static LIST_HEAD(cancel_list);
41static DEFINE_SPINLOCK(cancel_lock);
42
35/* 43/*
36 * This gets called when the timer event triggers. We set the "expired" 44 * This gets called when the timer event triggers. We set the "expired"
37 * flag, but we do not re-arm the timer (in case it's necessary, 45 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
51 return HRTIMER_NORESTART; 59 return HRTIMER_NORESTART;
52} 60}
53 61
62/*
63 * Called when the clock was set to cancel the timers in the cancel
64 * list.
65 */
66void timerfd_clock_was_set(void)
67{
68 ktime_t moffs = ktime_get_monotonic_offset();
69 struct timerfd_ctx *ctx;
70 unsigned long flags;
71
72 rcu_read_lock();
73 list_for_each_entry_rcu(ctx, &cancel_list, clist) {
74 if (!ctx->might_cancel)
75 continue;
76 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX;
79 wake_up_locked(&ctx->wqh);
80 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
82 }
83 rcu_read_unlock();
84}
85
86static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
87{
88 if (ctx->might_cancel) {
89 ctx->might_cancel = false;
90 spin_lock(&cancel_lock);
91 list_del_rcu(&ctx->clist);
92 spin_unlock(&cancel_lock);
93 }
94}
95
96static bool timerfd_canceled(struct timerfd_ctx *ctx)
97{
98 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
99 return false;
100 ctx->moffs = ktime_get_monotonic_offset();
101 return true;
102}
103
104static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
105{
106 if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
107 (flags & TFD_TIMER_CANCEL_ON_SET)) {
108 if (!ctx->might_cancel) {
109 ctx->might_cancel = true;
110 spin_lock(&cancel_lock);
111 list_add_rcu(&ctx->clist, &cancel_list);
112 spin_unlock(&cancel_lock);
113 }
114 } else if (ctx->might_cancel) {
115 timerfd_remove_cancel(ctx);
116 }
117}
118
54static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 119static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
55{ 120{
56 ktime_t remaining; 121 ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
59 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 124 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
60} 125}
61 126
62static void timerfd_setup(struct timerfd_ctx *ctx, int flags, 127static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
63 const struct itimerspec *ktmr) 128 const struct itimerspec *ktmr)
64{ 129{
65 enum hrtimer_mode htmode; 130 enum hrtimer_mode htmode;
66 ktime_t texp; 131 ktime_t texp;
132 int clockid = ctx->clockid;
67 133
68 htmode = (flags & TFD_TIMER_ABSTIME) ? 134 htmode = (flags & TFD_TIMER_ABSTIME) ?
69 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 135 HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
72 ctx->expired = 0; 138 ctx->expired = 0;
73 ctx->ticks = 0; 139 ctx->ticks = 0;
74 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 140 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
75 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 141 hrtimer_init(&ctx->tmr, clockid, htmode);
76 hrtimer_set_expires(&ctx->tmr, texp); 142 hrtimer_set_expires(&ctx->tmr, texp);
77 ctx->tmr.function = timerfd_tmrproc; 143 ctx->tmr.function = timerfd_tmrproc;
78 if (texp.tv64 != 0) 144 if (texp.tv64 != 0) {
79 hrtimer_start(&ctx->tmr, texp, htmode); 145 hrtimer_start(&ctx->tmr, texp, htmode);
146 if (timerfd_canceled(ctx))
147 return -ECANCELED;
148 }
149 return 0;
80} 150}
81 151
82static int timerfd_release(struct inode *inode, struct file *file) 152static int timerfd_release(struct inode *inode, struct file *file)
83{ 153{
84 struct timerfd_ctx *ctx = file->private_data; 154 struct timerfd_ctx *ctx = file->private_data;
85 155
156 timerfd_remove_cancel(ctx);
86 hrtimer_cancel(&ctx->tmr); 157 hrtimer_cancel(&ctx->tmr);
87 kfree(ctx); 158 kfree_rcu(ctx, rcu);
88 return 0; 159 return 0;
89} 160}
90 161
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
118 res = -EAGAIN; 189 res = -EAGAIN;
119 else 190 else
120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 191 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
192
193 /*
194 * If clock has changed, we do not care about the
195 * ticks and we do not rearm the timer. Userspace must
196 * reevaluate anyway.
197 */
198 if (timerfd_canceled(ctx)) {
199 ctx->ticks = 0;
200 ctx->expired = 0;
201 res = -ECANCELED;
202 }
203
121 if (ctx->ticks) { 204 if (ctx->ticks) {
122 ticks = ctx->ticks; 205 ticks = ctx->ticks;
206
123 if (ctx->expired && ctx->tintv.tv64) { 207 if (ctx->expired && ctx->tintv.tv64) {
124 /* 208 /*
125 * If tintv.tv64 != 0, this is a periodic timer that 209 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
183 init_waitqueue_head(&ctx->wqh); 267 init_waitqueue_head(&ctx->wqh);
184 ctx->clockid = clockid; 268 ctx->clockid = clockid;
185 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 269 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
270 ctx->moffs = ktime_get_monotonic_offset();
186 271
187 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 272 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
188 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 273 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
199 struct file *file; 284 struct file *file;
200 struct timerfd_ctx *ctx; 285 struct timerfd_ctx *ctx;
201 struct itimerspec ktmr, kotmr; 286 struct itimerspec ktmr, kotmr;
287 int ret;
202 288
203 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 289 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
204 return -EFAULT; 290 return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
213 return PTR_ERR(file); 299 return PTR_ERR(file);
214 ctx = file->private_data; 300 ctx = file->private_data;
215 301
302 timerfd_setup_cancel(ctx, flags);
303
216 /* 304 /*
217 * We need to stop the existing timer before reprogramming 305 * We need to stop the existing timer before reprogramming
218 * it to the new values. 306 * it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
240 /* 328 /*
241 * Re-program the timer to the new value ... 329 * Re-program the timer to the new value ...
242 */ 330 */
243 timerfd_setup(ctx, flags, &ktmr); 331 ret = timerfd_setup(ctx, flags, &ktmr);
244 332
245 spin_unlock_irq(&ctx->wqh.lock); 333 spin_unlock_irq(&ctx->wqh.lock);
246 fput(file); 334 fput(file);
247 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 335 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
248 return -EFAULT; 336 return -EFAULT;
249 337
250 return 0; 338 return ret;
251} 339}
252 340
253SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 341SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 8b3a7da531eb..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
106 long long liab; 106 long long liab;
107 107
108 spin_lock(&c->space_lock); 108 spin_lock(&c->space_lock);
109 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; 109 liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
110 spin_unlock(&c->space_lock); 110 spin_unlock(&c->space_lock);
111 return liab; 111 return liab;
112} 112}
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
180 int idx_lebs; 180 int idx_lebs;
181 long long idx_size; 181 long long idx_size;
182 182
183 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 183 idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
184 /* And make sure we have thrice the index size of space reserved */ 184 /* And make sure we have thrice the index size of space reserved */
185 idx_size += idx_size << 1; 185 idx_size += idx_size << 1;
186 /* 186 /*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
292 * budgeted index space to the size of the current index, multiplies this by 3, 292 * budgeted index space to the size of the current index, multiplies this by 3,
293 * and makes sure this does not exceed the amount of free LEBs. 293 * and makes sure this does not exceed the amount of free LEBs.
294 * 294 *
295 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 295 * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might 296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
297 * be large, because UBIFS does not do any index consolidation as long as 297 * be large, because UBIFS does not do any index consolidation as long as
298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs 298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
299 * will contain a lot of dirt. 299 * will contain a lot of dirt.
300 * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, 300 * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
301 * the index may be consolidated to take up to @c->min_idx_lebs LEBs. 301 * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
302 * 302 *
303 * This function returns zero in case of success, and %-ENOSPC in case of 303 * This function returns zero in case of success, and %-ENOSPC in case of
304 * failure. 304 * failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
343 c->lst.taken_empty_lebs; 343 c->lst.taken_empty_lebs;
344 if (unlikely(rsvd_idx_lebs > lebs)) { 344 if (unlikely(rsvd_idx_lebs > lebs)) {
345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
346 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, 346 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
347 rsvd_idx_lebs); 347 rsvd_idx_lebs);
348 return -ENOSPC; 348 return -ENOSPC;
349 } 349 }
350 350
351 available = ubifs_calc_available(c, min_idx_lebs); 351 available = ubifs_calc_available(c, min_idx_lebs);
352 outstanding = c->budg_data_growth + c->budg_dd_growth; 352 outstanding = c->bi.data_growth + c->bi.dd_growth;
353 353
354 if (unlikely(available < outstanding)) { 354 if (unlikely(available < outstanding)) {
355 dbg_budg("out of data space: available %lld, outstanding %lld", 355 dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
360 if (available - outstanding <= c->rp_size && !can_use_rp(c)) 360 if (available - outstanding <= c->rp_size && !can_use_rp(c))
361 return -ENOSPC; 361 return -ENOSPC;
362 362
363 c->min_idx_lebs = min_idx_lebs; 363 c->bi.min_idx_lebs = min_idx_lebs;
364 return 0; 364 return 0;
365} 365}
366 366
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
393{ 393{
394 int data_growth; 394 int data_growth;
395 395
396 data_growth = req->new_ino ? c->inode_budget : 0; 396 data_growth = req->new_ino ? c->bi.inode_budget : 0;
397 if (req->new_page) 397 if (req->new_page)
398 data_growth += c->page_budget; 398 data_growth += c->bi.page_budget;
399 if (req->new_dent) 399 if (req->new_dent)
400 data_growth += c->dent_budget; 400 data_growth += c->bi.dent_budget;
401 data_growth += req->new_ino_d; 401 data_growth += req->new_ino_d;
402 return data_growth; 402 return data_growth;
403} 403}
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
413{ 413{
414 int dd_growth; 414 int dd_growth;
415 415
416 dd_growth = req->dirtied_page ? c->page_budget : 0; 416 dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
417 417
418 if (req->dirtied_ino) 418 if (req->dirtied_ino)
419 dd_growth += c->inode_budget << (req->dirtied_ino - 1); 419 dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
420 if (req->mod_dent) 420 if (req->mod_dent)
421 dd_growth += c->dent_budget; 421 dd_growth += c->bi.dent_budget;
422 dd_growth += req->dirtied_ino_d; 422 dd_growth += req->dirtied_ino_d;
423 return dd_growth; 423 return dd_growth;
424} 424}
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
460 460
461again: 461again:
462 spin_lock(&c->space_lock); 462 spin_lock(&c->space_lock);
463 ubifs_assert(c->budg_idx_growth >= 0); 463 ubifs_assert(c->bi.idx_growth >= 0);
464 ubifs_assert(c->budg_data_growth >= 0); 464 ubifs_assert(c->bi.data_growth >= 0);
465 ubifs_assert(c->budg_dd_growth >= 0); 465 ubifs_assert(c->bi.dd_growth >= 0);
466 466
467 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { 467 if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
468 dbg_budg("no space"); 468 dbg_budg("no space");
469 spin_unlock(&c->space_lock); 469 spin_unlock(&c->space_lock);
470 return -ENOSPC; 470 return -ENOSPC;
471 } 471 }
472 472
473 c->budg_idx_growth += idx_growth; 473 c->bi.idx_growth += idx_growth;
474 c->budg_data_growth += data_growth; 474 c->bi.data_growth += data_growth;
475 c->budg_dd_growth += dd_growth; 475 c->bi.dd_growth += dd_growth;
476 476
477 err = do_budget_space(c); 477 err = do_budget_space(c);
478 if (likely(!err)) { 478 if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
484 } 484 }
485 485
486 /* Restore the old values */ 486 /* Restore the old values */
487 c->budg_idx_growth -= idx_growth; 487 c->bi.idx_growth -= idx_growth;
488 c->budg_data_growth -= data_growth; 488 c->bi.data_growth -= data_growth;
489 c->budg_dd_growth -= dd_growth; 489 c->bi.dd_growth -= dd_growth;
490 spin_unlock(&c->space_lock); 490 spin_unlock(&c->space_lock);
491 491
492 if (req->fast) { 492 if (req->fast) {
@@ -506,9 +506,9 @@ again:
506 goto again; 506 goto again;
507 } 507 }
508 dbg_budg("FS is full, -ENOSPC"); 508 dbg_budg("FS is full, -ENOSPC");
509 c->nospace = 1; 509 c->bi.nospace = 1;
510 if (can_use_rp(c) || c->rp_size == 0) 510 if (can_use_rp(c) || c->rp_size == 0)
511 c->nospace_rp = 1; 511 c->bi.nospace_rp = 1;
512 smp_wmb(); 512 smp_wmb();
513 } else 513 } else
514 ubifs_err("cannot budget space, error %d", err); 514 ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note, 523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
524 * since the index changes (which were budgeted for in @req->idx_growth) will 524 * since the index changes (which were budgeted for in @req->idx_growth) will
525 * only be written to the media on commit, this function moves the index budget 525 * only be written to the media on commit, this function moves the index budget
526 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be 526 * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
527 * zeroed by the commit operation. 527 * by the commit operation.
528 */ 528 */
529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
530{ 530{
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
553 if (!req->data_growth && !req->dd_growth) 553 if (!req->data_growth && !req->dd_growth)
554 return; 554 return;
555 555
556 c->nospace = c->nospace_rp = 0; 556 c->bi.nospace = c->bi.nospace_rp = 0;
557 smp_wmb(); 557 smp_wmb();
558 558
559 spin_lock(&c->space_lock); 559 spin_lock(&c->space_lock);
560 c->budg_idx_growth -= req->idx_growth; 560 c->bi.idx_growth -= req->idx_growth;
561 c->budg_uncommitted_idx += req->idx_growth; 561 c->bi.uncommitted_idx += req->idx_growth;
562 c->budg_data_growth -= req->data_growth; 562 c->bi.data_growth -= req->data_growth;
563 c->budg_dd_growth -= req->dd_growth; 563 c->bi.dd_growth -= req->dd_growth;
564 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 564 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
565 565
566 ubifs_assert(c->budg_idx_growth >= 0); 566 ubifs_assert(c->bi.idx_growth >= 0);
567 ubifs_assert(c->budg_data_growth >= 0); 567 ubifs_assert(c->bi.data_growth >= 0);
568 ubifs_assert(c->budg_dd_growth >= 0); 568 ubifs_assert(c->bi.dd_growth >= 0);
569 ubifs_assert(c->min_idx_lebs < c->main_lebs); 569 ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
570 ubifs_assert(!(c->budg_idx_growth & 7)); 570 ubifs_assert(!(c->bi.idx_growth & 7));
571 ubifs_assert(!(c->budg_data_growth & 7)); 571 ubifs_assert(!(c->bi.data_growth & 7));
572 ubifs_assert(!(c->budg_dd_growth & 7)); 572 ubifs_assert(!(c->bi.dd_growth & 7));
573 spin_unlock(&c->space_lock); 573 spin_unlock(&c->space_lock);
574} 574}
575 575
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
586{ 586{
587 spin_lock(&c->space_lock); 587 spin_lock(&c->space_lock);
588 /* Release the index growth reservation */ 588 /* Release the index growth reservation */
589 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 589 c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
590 /* Release the data growth reservation */ 590 /* Release the data growth reservation */
591 c->budg_data_growth -= c->page_budget; 591 c->bi.data_growth -= c->bi.page_budget;
592 /* Increase the dirty data growth reservation instead */ 592 /* Increase the dirty data growth reservation instead */
593 c->budg_dd_growth += c->page_budget; 593 c->bi.dd_growth += c->bi.page_budget;
594 /* And re-calculate the indexing space reservation */ 594 /* And re-calculate the indexing space reservation */
595 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 595 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
596 spin_unlock(&c->space_lock); 596 spin_unlock(&c->space_lock);
597} 597}
598 598
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
612 612
613 memset(&req, 0, sizeof(struct ubifs_budget_req)); 613 memset(&req, 0, sizeof(struct ubifs_budget_req));
614 /* The "no space" flags will be cleared because dd_growth is > 0 */ 614 /* The "no space" flags will be cleared because dd_growth is > 0 */
615 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 615 req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
616 ubifs_release_budget(c, &req); 616 ubifs_release_budget(c, &req);
617} 617}
618 618
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
682 int rsvd_idx_lebs, lebs; 682 int rsvd_idx_lebs, lebs;
683 long long available, outstanding, free; 683 long long available, outstanding, free;
684 684
685 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 685 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
686 outstanding = c->budg_data_growth + c->budg_dd_growth; 686 outstanding = c->bi.data_growth + c->bi.dd_growth;
687 available = ubifs_calc_available(c, c->min_idx_lebs); 687 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
688 688
689 /* 689 /*
690 * When reporting free space to user-space, UBIFS guarantees that it is 690 * When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
697 * Note, the calculations below are similar to what we have in 697 * Note, the calculations below are similar to what we have in
698 * 'do_budget_space()', so refer there for comments. 698 * 'do_budget_space()', so refer there for comments.
699 */ 699 */
700 if (c->min_idx_lebs > c->lst.idx_lebs) 700 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
701 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 701 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
702 else 702 else
703 rsvd_idx_lebs = 0; 703 rsvd_idx_lebs = 0;
704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 1bd01ded7123..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
182 c->mst_node->root_len = cpu_to_le32(zroot.len); 182 c->mst_node->root_len = cpu_to_le32(zroot.len);
183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
185 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 185 c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); 188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 004d3745dc45..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h> 35#include <linux/debugfs.h>
36#include <linux/math64.h> 36#include <linux/math64.h>
37#include <linux/slab.h>
38 37
39#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
40 39
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 42static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 43static char dbg_key_buf1[128];
45 44
46unsigned int ubifs_msg_flags;
47unsigned int ubifs_chk_flags; 45unsigned int ubifs_chk_flags;
48unsigned int ubifs_tst_flags; 46unsigned int ubifs_tst_flags;
49 47
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
51module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); 48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
52module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); 49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
53 50
54MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
55MODULE_PARM_DESC(debug_chks, "Debug check flags"); 51MODULE_PARM_DESC(debug_chks, "Debug check flags");
56MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); 52MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
57 53
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
317 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 313 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
318 printk(KERN_DEBUG "\t big_lpt %u\n", 314 printk(KERN_DEBUG "\t big_lpt %u\n",
319 !!(sup_flags & UBIFS_FLG_BIGLPT)); 315 !!(sup_flags & UBIFS_FLG_BIGLPT));
316 printk(KERN_DEBUG "\t space_fixup %u\n",
317 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
320 printk(KERN_DEBUG "\tmin_io_size %u\n", 318 printk(KERN_DEBUG "\tmin_io_size %u\n",
321 le32_to_cpu(sup->min_io_size)); 319 le32_to_cpu(sup->min_io_size));
322 printk(KERN_DEBUG "\tleb_size %u\n", 320 printk(KERN_DEBUG "\tleb_size %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
602 spin_unlock(&dbg_lock); 600 spin_unlock(&dbg_lock);
603} 601}
604 602
605void dbg_dump_budg(struct ubifs_info *c) 603void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
606{ 604{
607 int i; 605 int i;
608 struct rb_node *rb; 606 struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
610 struct ubifs_gced_idx_leb *idx_gc; 608 struct ubifs_gced_idx_leb *idx_gc;
611 long long available, outstanding, free; 609 long long available, outstanding, free;
612 610
613 ubifs_assert(spin_is_locked(&c->space_lock)); 611 spin_lock(&c->space_lock);
614 spin_lock(&dbg_lock); 612 spin_lock(&dbg_lock);
615 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 613 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
616 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 614 "total budget sum %lld\n", current->pid,
617 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 615 bi->data_growth + bi->dd_growth,
618 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 616 bi->data_growth + bi->dd_growth + bi->idx_growth);
619 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 617 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
620 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, 618 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
621 c->freeable_cnt); 619 bi->idx_growth);
622 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " 620 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
623 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, 621 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
624 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); 622 bi->uncommitted_idx);
623 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
624 bi->page_budget, bi->inode_budget, bi->dent_budget);
625 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
626 bi->nospace, bi->nospace_rp);
627 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
628 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
629
630 if (bi != &c->bi)
631 /*
632 * If we are dumping saved budgeting data, do not print
633 * additional information which is about the current state, not
634 * the old one which corresponded to the saved budgeting data.
635 */
636 goto out_unlock;
637
638 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
639 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
625 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 640 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
626 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 641 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
627 atomic_long_read(&c->dirty_zn_cnt), 642 atomic_long_read(&c->dirty_zn_cnt),
628 atomic_long_read(&c->clean_zn_cnt)); 643 atomic_long_read(&c->clean_zn_cnt));
629 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
630 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
631 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 644 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
632 c->gc_lnum, c->ihead_lnum); 645 c->gc_lnum, c->ihead_lnum);
646
633 /* If we are in R/O mode, journal heads do not exist */ 647 /* If we are in R/O mode, journal heads do not exist */
634 if (c->jheads) 648 if (c->jheads)
635 for (i = 0; i < c->jhead_cnt; i++) 649 for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
648 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 662 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
649 663
650 /* Print budgeting predictions */ 664 /* Print budgeting predictions */
651 available = ubifs_calc_available(c, c->min_idx_lebs); 665 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
652 outstanding = c->budg_data_growth + c->budg_dd_growth; 666 outstanding = c->bi.data_growth + c->bi.dd_growth;
653 free = ubifs_get_free_space_nolock(c); 667 free = ubifs_get_free_space_nolock(c);
654 printk(KERN_DEBUG "Budgeting predictions:\n"); 668 printk(KERN_DEBUG "Budgeting predictions:\n");
655 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 669 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
656 available, outstanding, free); 670 available, outstanding, free);
671out_unlock:
657 spin_unlock(&dbg_lock); 672 spin_unlock(&dbg_lock);
673 spin_unlock(&c->space_lock);
658} 674}
659 675
660void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) 676void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
729 if (bud->lnum == lp->lnum) { 745 if (bud->lnum == lp->lnum) {
730 int head = 0; 746 int head = 0;
731 for (i = 0; i < c->jhead_cnt; i++) { 747 for (i = 0; i < c->jhead_cnt; i++) {
732 if (lp->lnum == c->jheads[i].wbuf.lnum) { 748 /*
749 * Note, if we are in R/O mode or in the middle
750 * of mounting/re-mounting, the write-buffers do
751 * not exist.
752 */
753 if (c->jheads &&
754 lp->lnum == c->jheads[i].wbuf.lnum) {
733 printk(KERN_CONT ", jhead %s", 755 printk(KERN_CONT ", jhead %s",
734 dbg_jhead(i)); 756 dbg_jhead(i));
735 head = 1; 757 head = 1;
@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
976 998
977 spin_lock(&c->space_lock); 999 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); 1000 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
1001 memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
1002 d->saved_idx_gc_cnt = c->idx_gc_cnt;
979 1003
980 /* 1004 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it 1005 * We use a dirty hack here and zero out @c->freeable_cnt, because it
@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
1042out: 1066out:
1043 ubifs_msg("saved lprops statistics dump"); 1067 ubifs_msg("saved lprops statistics dump");
1044 dbg_dump_lstats(&d->saved_lst); 1068 dbg_dump_lstats(&d->saved_lst);
1045 ubifs_get_lp_stats(c, &lst); 1069 ubifs_msg("saved budgeting info dump");
1046 1070 dbg_dump_budg(c, &d->saved_bi);
1071 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1047 ubifs_msg("current lprops statistics dump"); 1072 ubifs_msg("current lprops statistics dump");
1073 ubifs_get_lp_stats(c, &lst);
1048 dbg_dump_lstats(&lst); 1074 dbg_dump_lstats(&lst);
1049 1075 ubifs_msg("current budgeting info dump");
1050 spin_lock(&c->space_lock); 1076 dbg_dump_budg(c, &c->bi);
1051 dbg_dump_budg(c);
1052 spin_unlock(&c->space_lock);
1053 dump_stack(); 1077 dump_stack();
1054 return -EINVAL; 1078 return -EINVAL;
1055} 1079}
@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1793 struct rb_node **p, *parent = NULL; 1817 struct rb_node **p, *parent = NULL;
1794 struct fsck_inode *fscki; 1818 struct fsck_inode *fscki;
1795 ino_t inum = key_inum_flash(c, &ino->key); 1819 ino_t inum = key_inum_flash(c, &ino->key);
1820 struct inode *inode;
1821 struct ubifs_inode *ui;
1796 1822
1797 p = &fsckd->inodes.rb_node; 1823 p = &fsckd->inodes.rb_node;
1798 while (*p) { 1824 while (*p) {
@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1816 if (!fscki) 1842 if (!fscki)
1817 return ERR_PTR(-ENOMEM); 1843 return ERR_PTR(-ENOMEM);
1818 1844
1845 inode = ilookup(c->vfs_sb, inum);
1846
1819 fscki->inum = inum; 1847 fscki->inum = inum;
1820 fscki->nlink = le32_to_cpu(ino->nlink); 1848 /*
1821 fscki->size = le64_to_cpu(ino->size); 1849 * If the inode is present in the VFS inode cache, use it instead of
1822 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1850 * the on-flash inode which might be out-of-date. E.g., the size might
1823 fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1851 * be out-of-date. If we do not do this, the following may happen, for
1824 fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1852 * example:
1825 fscki->mode = le32_to_cpu(ino->mode); 1853 * 1. A power cut happens
1854 * 2. We mount the file-system R/O, the replay process fixes up the
1855 * inode size in the VFS cache, but on on-flash.
1856 * 3. 'check_leaf()' fails because it hits a data node beyond inode
1857 * size.
1858 */
1859 if (!inode) {
1860 fscki->nlink = le32_to_cpu(ino->nlink);
1861 fscki->size = le64_to_cpu(ino->size);
1862 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1863 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1864 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1865 fscki->mode = le32_to_cpu(ino->mode);
1866 } else {
1867 ui = ubifs_inode(inode);
1868 fscki->nlink = inode->i_nlink;
1869 fscki->size = inode->i_size;
1870 fscki->xattr_cnt = ui->xattr_cnt;
1871 fscki->xattr_sz = ui->xattr_size;
1872 fscki->xattr_nms = ui->xattr_names;
1873 fscki->mode = inode->i_mode;
1874 iput(inode);
1875 }
1876
1826 if (S_ISDIR(fscki->mode)) { 1877 if (S_ISDIR(fscki->mode)) {
1827 fscki->calc_sz = UBIFS_INO_NODE_SZ; 1878 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1828 fscki->calc_cnt = 2; 1879 fscki->calc_cnt = 2;
1829 } 1880 }
1881
1830 rb_link_node(&fscki->rb, parent, p); 1882 rb_link_node(&fscki->rb, parent, p);
1831 rb_insert_color(&fscki->rb, &fsckd->inodes); 1883 rb_insert_color(&fscki->rb, &fsckd->inodes);
1884
1832 return fscki; 1885 return fscki;
1833} 1886}
1834 1887
@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2421 hashb = key_block(c, &sb->key); 2474 hashb = key_block(c, &sb->key);
2422 2475
2423 if (hasha > hashb) { 2476 if (hasha > hashb) {
2424 ubifs_err("larger hash %u goes before %u", hasha, hashb); 2477 ubifs_err("larger hash %u goes before %u",
2478 hasha, hashb);
2425 goto error_dump; 2479 goto error_dump;
2426 } 2480 }
2427 } 2481 }
@@ -2437,14 +2491,12 @@ error_dump:
2437 return 0; 2491 return 0;
2438} 2492}
2439 2493
2440static int invocation_cnt;
2441
2442int dbg_force_in_the_gaps(void) 2494int dbg_force_in_the_gaps(void)
2443{ 2495{
2444 if (!dbg_force_in_the_gaps_enabled) 2496 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2445 return 0; 2497 return 0;
2446 /* Force in-the-gaps every 8th commit */ 2498
2447 return !((invocation_cnt++) & 0x7); 2499 return !(random32() & 7);
2448} 2500}
2449 2501
2450/* Failure mode for recovery testing */ 2502/* Failure mode for recovery testing */
@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2632 int len, int check) 2684 int len, int check)
2633{ 2685{
2634 if (in_failure_mode(desc)) 2686 if (in_failure_mode(desc))
2635 return -EIO; 2687 return -EROFS;
2636 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2688 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2637} 2689}
2638 2690
@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2642 int err, failing; 2694 int err, failing;
2643 2695
2644 if (in_failure_mode(desc)) 2696 if (in_failure_mode(desc))
2645 return -EIO; 2697 return -EROFS;
2646 failing = do_fail(desc, lnum, 1); 2698 failing = do_fail(desc, lnum, 1);
2647 if (failing) 2699 if (failing)
2648 cut_data(buf, len); 2700 cut_data(buf, len);
@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2650 if (err) 2702 if (err)
2651 return err; 2703 return err;
2652 if (failing) 2704 if (failing)
2653 return -EIO; 2705 return -EROFS;
2654 return 0; 2706 return 0;
2655} 2707}
2656 2708
@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2660 int err; 2712 int err;
2661 2713
2662 if (do_fail(desc, lnum, 1)) 2714 if (do_fail(desc, lnum, 1))
2663 return -EIO; 2715 return -EROFS;
2664 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2716 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2665 if (err) 2717 if (err)
2666 return err; 2718 return err;
2667 if (do_fail(desc, lnum, 1)) 2719 if (do_fail(desc, lnum, 1))
2668 return -EIO; 2720 return -EROFS;
2669 return 0; 2721 return 0;
2670} 2722}
2671 2723
@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2674 int err; 2726 int err;
2675 2727
2676 if (do_fail(desc, lnum, 0)) 2728 if (do_fail(desc, lnum, 0))
2677 return -EIO; 2729 return -EROFS;
2678 err = ubi_leb_erase(desc, lnum); 2730 err = ubi_leb_erase(desc, lnum);
2679 if (err) 2731 if (err)
2680 return err; 2732 return err;
2681 if (do_fail(desc, lnum, 0)) 2733 if (do_fail(desc, lnum, 0))
2682 return -EIO; 2734 return -EROFS;
2683 return 0; 2735 return 0;
2684} 2736}
2685 2737
@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2688 int err; 2740 int err;
2689 2741
2690 if (do_fail(desc, lnum, 0)) 2742 if (do_fail(desc, lnum, 0))
2691 return -EIO; 2743 return -EROFS;
2692 err = ubi_leb_unmap(desc, lnum); 2744 err = ubi_leb_unmap(desc, lnum);
2693 if (err) 2745 if (err)
2694 return err; 2746 return err;
2695 if (do_fail(desc, lnum, 0)) 2747 if (do_fail(desc, lnum, 0))
2696 return -EIO; 2748 return -EROFS;
2697 return 0; 2749 return 0;
2698} 2750}
2699 2751
2700int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2752int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2701{ 2753{
2702 if (in_failure_mode(desc)) 2754 if (in_failure_mode(desc))
2703 return -EIO; 2755 return -EROFS;
2704 return ubi_is_mapped(desc, lnum); 2756 return ubi_is_mapped(desc, lnum);
2705} 2757}
2706 2758
@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2709 int err; 2761 int err;
2710 2762
2711 if (do_fail(desc, lnum, 0)) 2763 if (do_fail(desc, lnum, 0))
2712 return -EIO; 2764 return -EROFS;
2713 err = ubi_leb_map(desc, lnum, dtype); 2765 err = ubi_leb_map(desc, lnum, dtype);
2714 if (err) 2766 if (err)
2715 return err; 2767 return err;
2716 if (do_fail(desc, lnum, 0)) 2768 if (do_fail(desc, lnum, 0))
2717 return -EIO; 2769 return -EROFS;
2718 return 0; 2770 return 0;
2719} 2771}
2720 2772
@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
2784static int open_debugfs_file(struct inode *inode, struct file *file) 2836static int open_debugfs_file(struct inode *inode, struct file *file)
2785{ 2837{
2786 file->private_data = inode->i_private; 2838 file->private_data = inode->i_private;
2787 return 0; 2839 return nonseekable_open(inode, file);
2788} 2840}
2789 2841
2790static ssize_t write_debugfs_file(struct file *file, const char __user *buf, 2842static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2795 2847
2796 if (file->f_path.dentry == d->dfs_dump_lprops) 2848 if (file->f_path.dentry == d->dfs_dump_lprops)
2797 dbg_dump_lprops(c); 2849 dbg_dump_lprops(c);
2798 else if (file->f_path.dentry == d->dfs_dump_budg) { 2850 else if (file->f_path.dentry == d->dfs_dump_budg)
2799 spin_lock(&c->space_lock); 2851 dbg_dump_budg(c, &c->bi);
2800 dbg_dump_budg(c); 2852 else if (file->f_path.dentry == d->dfs_dump_tnc) {
2801 spin_unlock(&c->space_lock);
2802 } else if (file->f_path.dentry == d->dfs_dump_tnc) {
2803 mutex_lock(&c->tnc_mutex); 2853 mutex_lock(&c->tnc_mutex);
2804 dbg_dump_tnc(c); 2854 dbg_dump_tnc(c);
2805 mutex_unlock(&c->tnc_mutex); 2855 mutex_unlock(&c->tnc_mutex);
2806 } else 2856 } else
2807 return -EINVAL; 2857 return -EINVAL;
2808 2858
2809 *ppos += count;
2810 return count; 2859 return count;
2811} 2860}
2812 2861
@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
2814 .open = open_debugfs_file, 2863 .open = open_debugfs_file,
2815 .write = write_debugfs_file, 2864 .write = write_debugfs_file,
2816 .owner = THIS_MODULE, 2865 .owner = THIS_MODULE,
2817 .llseek = default_llseek, 2866 .llseek = no_llseek,
2818}; 2867};
2819 2868
2820/** 2869/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e6493cac193d..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
33 33
34#include <linux/random.h>
35
34/** 36/**
35 * ubifs_debug_info - per-FS debugging information. 37 * ubifs_debug_info - per-FS debugging information.
36 * @old_zroot: old index root - used by 'dbg_check_old_index()' 38 * @old_zroot: old index root - used by 'dbg_check_old_index()'
@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
50 * @new_ihead_offs: used by debugging to check @c->ihead_offs 52 * @new_ihead_offs: used by debugging to check @c->ihead_offs
51 * 53 *
52 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') 54 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
53 * @saved_free: saved free space (used by 'dbg_save_space_info()') 55 * @saved_bi: saved budgeting information
56 * @saved_free: saved amount of free space
57 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
54 * 58 *
55 * dfs_dir_name: name of debugfs directory containing this file-system's files 59 * @dfs_dir_name: name of debugfs directory containing this file-system's files
56 * dfs_dir: direntry object of the file-system debugfs directory 60 * @dfs_dir: direntry object of the file-system debugfs directory
57 * dfs_dump_lprops: "dump lprops" debugfs knob 61 * @dfs_dump_lprops: "dump lprops" debugfs knob
58 * dfs_dump_budg: "dump budgeting information" debugfs knob 62 * @dfs_dump_budg: "dump budgeting information" debugfs knob
59 * dfs_dump_tnc: "dump TNC" debugfs knob 63 * @dfs_dump_tnc: "dump TNC" debugfs knob
60 */ 64 */
61struct ubifs_debug_info { 65struct ubifs_debug_info {
62 struct ubifs_zbranch old_zroot; 66 struct ubifs_zbranch old_zroot;
@@ -76,7 +80,9 @@ struct ubifs_debug_info {
76 int new_ihead_offs; 80 int new_ihead_offs;
77 81
78 struct ubifs_lp_stats saved_lst; 82 struct ubifs_lp_stats saved_lst;
83 struct ubifs_budg_info saved_bi;
79 long long saved_free; 84 long long saved_free;
85 int saved_idx_gc_cnt;
80 86
81 char dfs_dir_name[100]; 87 char dfs_dir_name[100];
82 struct dentry *dfs_dir; 88 struct dentry *dfs_dir;
@@ -101,23 +107,7 @@ struct ubifs_debug_info {
101 } \ 107 } \
102} while (0) 108} while (0)
103 109
104#define dbg_dump_stack() do { \ 110#define dbg_dump_stack() dump_stack()
105 if (!dbg_failure_mode) \
106 dump_stack(); \
107} while (0)
108
109/* Generic debugging messages */
110#define dbg_msg(fmt, ...) do { \
111 spin_lock(&dbg_lock); \
112 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
113 __func__, ##__VA_ARGS__); \
114 spin_unlock(&dbg_lock); \
115} while (0)
116
117#define dbg_do_msg(typ, fmt, ...) do { \
118 if (ubifs_msg_flags & typ) \
119 dbg_msg(fmt, ##__VA_ARGS__); \
120} while (0)
121 111
122#define dbg_err(fmt, ...) do { \ 112#define dbg_err(fmt, ...) do { \
123 spin_lock(&dbg_lock); \ 113 spin_lock(&dbg_lock); \
@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
137#define DBGKEY(key) dbg_key_str0(c, (key)) 127#define DBGKEY(key) dbg_key_str0(c, (key))
138#define DBGKEY1(key) dbg_key_str1(c, (key)) 128#define DBGKEY1(key) dbg_key_str1(c, (key))
139 129
140/* General messages */ 130#define ubifs_dbg_msg(type, fmt, ...) do { \
141#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 131 spin_lock(&dbg_lock); \
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
133 spin_unlock(&dbg_lock); \
134} while (0)
142 135
136/* Just a debugging messages not related to any specific UBIFS subsystem */
137#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
138/* General messages */
139#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
143/* Additional journal messages */ 140/* Additional journal messages */
144#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 141#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
145
146/* Additional TNC messages */ 142/* Additional TNC messages */
147#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 143#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
148
149/* Additional lprops messages */ 144/* Additional lprops messages */
150#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 145#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
151
152/* Additional LEB find messages */ 146/* Additional LEB find messages */
153#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 147#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
154
155/* Additional mount messages */ 148/* Additional mount messages */
156#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 149#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
157
158/* Additional I/O messages */ 150/* Additional I/O messages */
159#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 151#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
160
161/* Additional commit messages */ 152/* Additional commit messages */
162#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 153#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
163
164/* Additional budgeting messages */ 154/* Additional budgeting messages */
165#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 155#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
166
167/* Additional log messages */ 156/* Additional log messages */
168#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 157#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
169
170/* Additional gc messages */ 158/* Additional gc messages */
171#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 159#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
172
173/* Additional scan messages */ 160/* Additional scan messages */
174#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 161#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
175
176/* Additional recovery messages */ 162/* Additional recovery messages */
177#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 163#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
178
179/*
180 * Debugging message type flags.
181 *
182 * UBIFS_MSG_GEN: general messages
183 * UBIFS_MSG_JNL: journal messages
184 * UBIFS_MSG_MNT: mount messages
185 * UBIFS_MSG_CMT: commit messages
186 * UBIFS_MSG_FIND: LEB find messages
187 * UBIFS_MSG_BUDG: budgeting messages
188 * UBIFS_MSG_GC: garbage collection messages
189 * UBIFS_MSG_TNC: TNC messages
190 * UBIFS_MSG_LP: lprops messages
191 * UBIFS_MSG_IO: I/O messages
192 * UBIFS_MSG_LOG: log messages
193 * UBIFS_MSG_SCAN: scan messages
194 * UBIFS_MSG_RCVRY: recovery messages
195 */
196enum {
197 UBIFS_MSG_GEN = 0x1,
198 UBIFS_MSG_JNL = 0x2,
199 UBIFS_MSG_MNT = 0x4,
200 UBIFS_MSG_CMT = 0x8,
201 UBIFS_MSG_FIND = 0x10,
202 UBIFS_MSG_BUDG = 0x20,
203 UBIFS_MSG_GC = 0x40,
204 UBIFS_MSG_TNC = 0x80,
205 UBIFS_MSG_LP = 0x100,
206 UBIFS_MSG_IO = 0x200,
207 UBIFS_MSG_LOG = 0x400,
208 UBIFS_MSG_SCAN = 0x800,
209 UBIFS_MSG_RCVRY = 0x1000,
210};
211 164
212/* 165/*
213 * Debugging check flags. 166 * Debugging check flags.
@@ -233,11 +186,9 @@ enum {
233/* 186/*
234 * Special testing flags. 187 * Special testing flags.
235 * 188 *
236 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
237 * UBIFS_TST_RCVRY: failure mode for recovery testing 189 * UBIFS_TST_RCVRY: failure mode for recovery testing
238 */ 190 */
239enum { 191enum {
240 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
241 UBIFS_TST_RCVRY = 0x4, 192 UBIFS_TST_RCVRY = 0x4,
242}; 193};
243 194
@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
262 int offs); 213 int offs);
263void dbg_dump_budget_req(const struct ubifs_budget_req *req); 214void dbg_dump_budget_req(const struct ubifs_budget_req *req);
264void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 215void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
265void dbg_dump_budg(struct ubifs_info *c); 216void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
266void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 217void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
267void dbg_dump_lprops(struct ubifs_info *c); 218void dbg_dump_lprops(struct ubifs_info *c);
268void dbg_dump_lpt_info(struct ubifs_info *c); 219void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
304int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); 255int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
305 256
306/* Force the use of in-the-gaps method for testing */ 257/* Force the use of in-the-gaps method for testing */
307 258static inline int dbg_force_in_the_gaps_enabled(void)
308#define dbg_force_in_the_gaps_enabled \ 259{
309 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) 260 return ubifs_chk_flags & UBIFS_CHK_GEN;
310 261}
311int dbg_force_in_the_gaps(void); 262int dbg_force_in_the_gaps(void);
312 263
313/* Failure mode for recovery testing */ 264/* Failure mode for recovery testing */
314
315#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 265#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
316 266
317#ifndef UBIFS_DBG_PRESERVE_UBI 267#ifndef UBIFS_DBG_PRESERVE_UBI
318
319#define ubi_leb_read dbg_leb_read 268#define ubi_leb_read dbg_leb_read
320#define ubi_leb_write dbg_leb_write 269#define ubi_leb_write dbg_leb_write
321#define ubi_leb_change dbg_leb_change 270#define ubi_leb_change dbg_leb_change
@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
323#define ubi_leb_unmap dbg_leb_unmap 272#define ubi_leb_unmap dbg_leb_unmap
324#define ubi_is_mapped dbg_is_mapped 273#define ubi_is_mapped dbg_is_mapped
325#define ubi_leb_map dbg_leb_map 274#define ubi_leb_map dbg_leb_map
326
327#endif 275#endif
328 276
329int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, 277int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
370 __func__, __LINE__, current->pid); \ 318 __func__, __LINE__, current->pid); \
371} while (0) 319} while (0)
372 320
373#define dbg_err(fmt, ...) do { \ 321#define dbg_err(fmt, ...) do { \
374 if (0) \ 322 if (0) \
375 ubifs_err(fmt, ##__VA_ARGS__); \ 323 ubifs_err(fmt, ##__VA_ARGS__); \
376} while (0) 324} while (0)
377 325
378#define dbg_msg(fmt, ...) do { \ 326#define ubifs_dbg_msg(fmt, ...) do { \
379 if (0) \ 327 if (0) \
380 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ 328 pr_debug(fmt "\n", ##__VA_ARGS__); \
381 current->pid, __func__, ##__VA_ARGS__); \
382} while (0) 329} while (0)
383 330
384#define dbg_dump_stack() 331#define dbg_dump_stack()
385#define ubifs_assert_cmt_locked(c) 332#define ubifs_assert_cmt_locked(c)
386 333
387#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 334#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
388#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 335#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
389#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 336#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
390#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 337#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
391#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 338#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
392#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 339#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
393#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 340#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
394#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 341#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
395#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 342#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
396#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 343#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
397#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 344#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
398#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 345#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
399#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 346#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
400 348
401#define DBGKEY(key) ((char *)(key)) 349#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 350#define DBGKEY1(key) ((char *)(key))
@@ -420,7 +368,9 @@ static inline void
420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } 368dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421static inline void 369static inline void
422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } 370dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423static inline void dbg_dump_budg(struct ubifs_info *c) { return; } 371static inline void
372dbg_dump_budg(struct ubifs_info *c,
373 const struct ubifs_budg_info *bi) { return; }
424static inline void dbg_dump_lprop(const struct ubifs_info *c, 374static inline void dbg_dump_lprop(const struct ubifs_info *c,
425 const struct ubifs_lprops *lp) { return; } 375 const struct ubifs_lprops *lp) { return; }
426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } 376static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; } 432 struct list_head *head) { return 0; }
483 433
484static inline int dbg_force_in_the_gaps(void) { return 0; } 434static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0 435#define dbg_force_in_the_gaps_enabled() 0
486#define dbg_failure_mode 0 436#define dbg_failure_mode 0
487 437
488static inline int dbg_debugfs_init(void) { return 0; } 438static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; } 439static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 7217d67a80a6..c2b80943560d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
603 ubifs_release_budget(c, &req); 603 ubifs_release_budget(c, &req);
604 else { 604 else {
605 /* We've deleted something - clean the "no space" flags */ 605 /* We've deleted something - clean the "no space" flags */
606 c->nospace = c->nospace_rp = 0; 606 c->bi.nospace = c->bi.nospace_rp = 0;
607 smp_wmb(); 607 smp_wmb();
608 } 608 }
609 return 0; 609 return 0;
@@ -656,6 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
656 struct ubifs_inode *dir_ui = ubifs_inode(dir); 656 struct ubifs_inode *dir_ui = ubifs_inode(dir);
657 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; 657 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
658 658
659 dentry_unhash(dentry);
660
659 /* 661 /*
660 * Budget request settings: deletion direntry, deletion inode and 662 * Budget request settings: deletion direntry, deletion inode and
661 * changing the parent inode. If budgeting fails, go ahead anyway 663 * changing the parent inode. If budgeting fails, go ahead anyway
@@ -693,7 +695,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
693 ubifs_release_budget(c, &req); 695 ubifs_release_budget(c, &req);
694 else { 696 else {
695 /* We've deleted something - clean the "no space" flags */ 697 /* We've deleted something - clean the "no space" flags */
696 c->nospace = c->nospace_rp = 0; 698 c->bi.nospace = c->bi.nospace_rp = 0;
697 smp_wmb(); 699 smp_wmb();
698 } 700 }
699 return 0; 701 return 0;
@@ -976,6 +978,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
976 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; 978 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
977 struct timespec time; 979 struct timespec time;
978 980
981 if (new_inode && S_ISDIR(new_inode->i_mode))
982 dentry_unhash(new_dentry);
983
979 /* 984 /*
980 * Budget request settings: deletion direntry, new direntry, removing 985 * Budget request settings: deletion direntry, new direntry, removing
981 * the old inode, and changing old and new parent directory inodes. 986 * the old inode, and changing old and new parent directory inodes.
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b286db79c686..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
212 */ 212 */
213static void release_existing_page_budget(struct ubifs_info *c) 213static void release_existing_page_budget(struct ubifs_info *c)
214{ 214{
215 struct ubifs_budget_req req = { .dd_growth = c->page_budget}; 215 struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
216 216
217 ubifs_release_budget(c, &req); 217 ubifs_release_budget(c, &req);
218} 218}
@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
971 * the page locked, and it locks @ui_mutex. However, write-back does take inode 971 * the page locked, and it locks @ui_mutex. However, write-back does take inode
972 * @i_mutex, which means other VFS operations may be run on this inode at the 972 * @i_mutex, which means other VFS operations may be run on this inode at the
973 * same time. And the problematic one is truncation to smaller size, from where 973 * same time. And the problematic one is truncation to smaller size, from where
974 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then 974 * we have to call 'truncate_setsize()', which first changes @inode->i_size,
975 * drops the truncated pages. And while dropping the pages, it takes the page 975 * then drops the truncated pages. And while dropping the pages, it takes the
976 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with 976 * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
977 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 977 * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
978 * means that @inode->i_size is changed while @ui_mutex is unlocked. 978 * This means that @inode->i_size is changed while @ui_mutex is unlocked.
979 * 979 *
980 * XXX(truncate): with the new truncate sequence this is not true anymore, 980 * XXX(truncate): with the new truncate sequence this is not true anymore,
981 * and the calls to truncate_setsize can be move around freely. They should 981 * and the calls to truncate_setsize can be move around freely. They should
@@ -1189,7 +1189,7 @@ out_budg:
1189 if (budgeted) 1189 if (budgeted)
1190 ubifs_release_budget(c, &req); 1190 ubifs_release_budget(c, &req);
1191 else { 1191 else {
1192 c->nospace = c->nospace_rp = 0; 1192 c->bi.nospace = c->bi.nospace_rp = 0;
1193 smp_wmb(); 1193 smp_wmb();
1194 } 1194 }
1195 return err; 1195 return err;
@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY) 1315 if (c->ro_mount)
1316 /*
1317 * For some really strange reasons VFS does not filter out
1318 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
1319 */
1316 return 0; 1320 return 0;
1317 1321
1318 /* 1322 /*
@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1432} 1436}
1433 1437
1434/* 1438/*
1435 * mmap()d file has taken write protection fault and is being made 1439 * mmap()d file has taken write protection fault and is being made writable.
1436 * writable. UBIFS must ensure page is budgeted for. 1440 * UBIFS must ensure page is budgeted for.
1437 */ 1441 */
1438static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1442static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1443 struct vm_fault *vmf)
1439{ 1444{
1440 struct page *page = vmf->page; 1445 struct page *page = vmf->page;
1441 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1446 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1536{ 1541{
1537 int err; 1542 int err;
1538 1543
1539 /* 'generic_file_mmap()' takes care of NOMMU case */
1540 err = generic_file_mmap(file, vma); 1544 err = generic_file_mmap(file, vma);
1541 if (err) 1545 if (err)
1542 return err; 1546 return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
252 * But if the index takes fewer LEBs than it is reserved for it, 252 * But if the index takes fewer LEBs than it is reserved for it,
253 * this function must avoid picking those reserved LEBs. 253 * this function must avoid picking those reserved LEBs.
254 */ 254 */
255 if (c->min_idx_lebs >= c->lst.idx_lebs) { 255 if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
256 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 256 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
257 exclude_index = 1; 257 exclude_index = 1;
258 } 258 }
259 spin_unlock(&c->space_lock); 259 spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
276 pick_free = 0; 276 pick_free = 0;
277 } else { 277 } else {
278 spin_lock(&c->space_lock); 278 spin_lock(&c->space_lock);
279 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); 279 exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
280 spin_unlock(&c->space_lock); 280 spin_unlock(&c->space_lock);
281 } 281 }
282 282
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
501 501
502 /* Check if there are enough empty LEBs for commit */ 502 /* Check if there are enough empty LEBs for commit */
503 spin_lock(&c->space_lock); 503 spin_lock(&c->space_lock);
504 if (c->min_idx_lebs > c->lst.idx_lebs) 504 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
505 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 505 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
506 else 506 else
507 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 151f10882820..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
100 if (err) 100 if (err)
101 return err; 101 return err;
102 102
103 err = ubifs_wbuf_sync_nolock(wbuf);
104 if (err)
105 return err;
106
103 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); 107 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
104 if (err) 108 if (err)
105 return err; 109 return err;
@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
118 * This function compares data nodes @a and @b. Returns %1 if @a has greater 122 * This function compares data nodes @a and @b. Returns %1 if @a has greater
119 * inode or block number, and %-1 otherwise. 123 * inode or block number, and %-1 otherwise.
120 */ 124 */
121int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 125static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
122{ 126{
123 ino_t inuma, inumb; 127 ino_t inuma, inumb;
124 struct ubifs_info *c = priv; 128 struct ubifs_info *c = priv;
@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
161 * first and sorted by length in descending order. Directory entry nodes go 165 * first and sorted by length in descending order. Directory entry nodes go
162 * after inode nodes and are sorted in ascending hash valuer order. 166 * after inode nodes and are sorted in ascending hash valuer order.
163 */ 167 */
164int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 168static int nondata_nodes_cmp(void *priv, struct list_head *a,
169 struct list_head *b)
165{ 170{
166 ino_t inuma, inumb; 171 ino_t inuma, inumb;
167 struct ubifs_info *c = priv; 172 struct ubifs_info *c = priv;
@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
473 ubifs_assert(c->gc_lnum != lnum); 478 ubifs_assert(c->gc_lnum != lnum);
474 ubifs_assert(wbuf->lnum != lnum); 479 ubifs_assert(wbuf->lnum != lnum);
475 480
481 if (lp->free + lp->dirty == c->leb_size) {
482 /* Special case - a free LEB */
483 dbg_gc("LEB %d is free, return it", lp->lnum);
484 ubifs_assert(!(lp->flags & LPROPS_INDEX));
485
486 if (lp->free != c->leb_size) {
487 /*
488 * Write buffers must be sync'd before unmapping
489 * freeable LEBs, because one of them may contain data
490 * which obsoletes something in 'lp->pnum'.
491 */
492 err = gc_sync_wbufs(c);
493 if (err)
494 return err;
495 err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
496 0, 0, 0, 0);
497 if (err)
498 return err;
499 }
500 err = ubifs_leb_unmap(c, lp->lnum);
501 if (err)
502 return err;
503
504 if (c->gc_lnum == -1) {
505 c->gc_lnum = lnum;
506 return LEB_RETAINED;
507 }
508
509 return LEB_FREED;
510 }
511
476 /* 512 /*
477 * We scan the entire LEB even though we only really need to scan up to 513 * We scan the entire LEB even though we only really need to scan up to
478 * (c->leb_size - lp->free). 514 * (c->leb_size - lp->free).
@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
682 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 718 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
683 lp.free + lp.dirty, min_space); 719 lp.free + lp.dirty, min_space);
684 720
685 if (lp.free + lp.dirty == c->leb_size) {
686 /* An empty LEB was returned */
687 dbg_gc("LEB %d is free, return it", lp.lnum);
688 /*
689 * ubifs_find_dirty_leb() doesn't return freeable index
690 * LEBs.
691 */
692 ubifs_assert(!(lp.flags & LPROPS_INDEX));
693 if (lp.free != c->leb_size) {
694 /*
695 * Write buffers must be sync'd before
696 * unmapping freeable LEBs, because one of them
697 * may contain data which obsoletes something
698 * in 'lp.pnum'.
699 */
700 ret = gc_sync_wbufs(c);
701 if (ret)
702 goto out;
703 ret = ubifs_change_one_lp(c, lp.lnum,
704 c->leb_size, 0, 0, 0,
705 0);
706 if (ret)
707 goto out;
708 }
709 ret = ubifs_leb_unmap(c, lp.lnum);
710 if (ret)
711 goto out;
712 ret = lp.lnum;
713 break;
714 }
715
716 space_before = c->leb_size - wbuf->offs - wbuf->used; 721 space_before = c->leb_size - wbuf->offs - wbuf->used;
717 if (wbuf->lnum == -1) 722 if (wbuf->lnum == -1)
718 space_before = 0; 723 space_before = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index dfd168b7807e..166951e0dcd3 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
393 ubifs_assert(wbuf->size % c->min_io_size == 0); 393 ubifs_assert(wbuf->size % c->min_io_size == 0);
394 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size) 395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
397 397
398 if (c->ro_error) 398 if (c->ro_error)
399 return -EROFS; 399 return -EROFS;
@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
452 * @dtype: data type 452 * @dtype: data type
453 * 453 *
454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
455 * The write-buffer is synchronized if it is not empty. Returns zero in case of 455 * The write-buffer has to be empty. Returns zero in case of success and a
456 * success and a negative error code in case of failure. 456 * negative error code in case of failure.
457 */ 457 */
458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
459 int dtype) 459 int dtype)
@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size);
466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
467 ubifs_assert(lnum != wbuf->lnum); 467 ubifs_assert(lnum != wbuf->lnum);
468 468 ubifs_assert(wbuf->used == 0);
469 if (wbuf->used > 0) {
470 int err = ubifs_wbuf_sync_nolock(wbuf);
471
472 if (err)
473 return err;
474 }
475 469
476 spin_lock(&wbuf->lock); 470 spin_lock(&wbuf->lock);
477 wbuf->lnum = lnum; 471 wbuf->lnum = lnum;
@@ -573,7 +567,7 @@ out_timers:
573int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 567int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
574{ 568{
575 struct ubifs_info *c = wbuf->c; 569 struct ubifs_info *c = wbuf->c;
576 int err, written, n, aligned_len = ALIGN(len, 8), offs; 570 int err, written, n, aligned_len = ALIGN(len, 8);
577 571
578 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 572 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
579 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 573 dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
589 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size) 584 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
592 586
593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 587 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
594 err = -ENOSPC; 588 err = -ENOSPC;
@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
636 goto exit; 630 goto exit;
637 } 631 }
638 632
639 offs = wbuf->offs;
640 written = 0; 633 written = 0;
641 634
642 if (wbuf->used) { 635 if (wbuf->used) {
@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
653 if (err) 646 if (err)
654 goto out; 647 goto out;
655 648
656 offs += wbuf->size; 649 wbuf->offs += wbuf->size;
657 len -= wbuf->avail; 650 len -= wbuf->avail;
658 aligned_len -= wbuf->avail; 651 aligned_len -= wbuf->avail;
659 written += wbuf->avail; 652 written += wbuf->avail;
@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
672 if (err) 665 if (err)
673 goto out; 666 goto out;
674 667
675 offs += wbuf->size; 668 wbuf->offs += wbuf->size;
676 len -= wbuf->size; 669 len -= wbuf->size;
677 aligned_len -= wbuf->size; 670 aligned_len -= wbuf->size;
678 written += wbuf->size; 671 written += wbuf->size;
@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
687 n = aligned_len >> c->max_write_shift; 680 n = aligned_len >> c->max_write_shift;
688 if (n) { 681 if (n) {
689 n <<= c->max_write_shift; 682 n <<= c->max_write_shift;
690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 683 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 684 wbuf->offs);
692 wbuf->dtype); 685 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
686 wbuf->offs, n, wbuf->dtype);
693 if (err) 687 if (err)
694 goto out; 688 goto out;
695 offs += n; 689 wbuf->offs += n;
696 aligned_len -= n; 690 aligned_len -= n;
697 len -= n; 691 len -= n;
698 written += n; 692 written += n;
@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
707 */ 701 */
708 memcpy(wbuf->buf, buf + written, len); 702 memcpy(wbuf->buf, buf + written, len);
709 703
710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size) 704 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size; 705 wbuf->size = c->max_write_size;
713 else 706 else
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index aed25e864227..34b1679e6e3a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -141,14 +141,8 @@ again:
141 * LEB with some empty space. 141 * LEB with some empty space.
142 */ 142 */
143 lnum = ubifs_find_free_space(c, len, &offs, squeeze); 143 lnum = ubifs_find_free_space(c, len, &offs, squeeze);
144 if (lnum >= 0) { 144 if (lnum >= 0)
145 /* Found an LEB, add it to the journal head */
146 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
147 if (err)
148 goto out_return;
149 /* A new bud was successfully allocated and added to the log */
150 goto out; 145 goto out;
151 }
152 146
153 err = lnum; 147 err = lnum;
154 if (err != -ENOSPC) 148 if (err != -ENOSPC)
@@ -203,12 +197,23 @@ again:
203 return 0; 197 return 0;
204 } 198 }
205 199
206 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
207 if (err)
208 goto out_return;
209 offs = 0; 200 offs = 0;
210 201
211out: 202out:
203 /*
204 * Make sure we synchronize the write-buffer before we add the new bud
205 * to the log. Otherwise we may have a power cut after the log
206 * reference node for the last bud (@lnum) is written but before the
207 * write-buffer data are written to the next-to-last bud
208 * (@wbuf->lnum). And the effect would be that the recovery would see
209 * that there is corruption in the next-to-last bud.
210 */
211 err = ubifs_wbuf_sync_nolock(wbuf);
212 if (err)
213 goto out_return;
214 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
215 if (err)
216 goto out_return;
212 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); 217 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
213 if (err) 218 if (err)
214 goto out_unlock; 219 goto out_unlock;
@@ -380,10 +385,8 @@ out:
380 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
381 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
382 down_write(&c->commit_sem); 387 down_write(&c->commit_sem);
383 spin_lock(&c->space_lock);
384 dbg_dump_stack(); 388 dbg_dump_stack();
385 dbg_dump_budg(c); 389 dbg_dump_budg(c, &c->bi);
386 spin_unlock(&c->space_lock);
387 dbg_dump_lprops(c); 390 dbg_dump_lprops(c);
388 cmt_retries = dbg_check_lprops(c); 391 cmt_retries = dbg_check_lprops(c);
389 up_write(&c->commit_sem); 392 up_write(&c->commit_sem);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 40fa780ebea7..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
100} 100}
101 101
102/** 102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log. 103 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object 104 * @c: UBIFS file-system description object
119 */ 105 */
@@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
257 ref->jhead = cpu_to_le32(jhead); 243 ref->jhead = cpu_to_le32(jhead);
258 244
259 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 245 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
260 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 246 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
261 c->lhead_offs = 0; 247 c->lhead_offs = 0;
262 } 248 }
263 249
@@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
425 411
426 /* Switch to the next log LEB */ 412 /* Switch to the next log LEB */
427 if (c->lhead_offs) { 413 if (c->lhead_offs) {
428 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 414 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
429 c->lhead_offs = 0; 415 c->lhead_offs = 0;
430 } 416 }
431 417
@@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
446 432
447 c->lhead_offs += len; 433 c->lhead_offs += len;
448 if (c->lhead_offs == c->leb_size) { 434 if (c->lhead_offs == c->leb_size) {
449 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 435 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
450 c->lhead_offs = 0; 436 c->lhead_offs = 0;
451 } 437 }
452 438
@@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
533 } 519 }
534 mutex_lock(&c->log_mutex); 520 mutex_lock(&c->log_mutex);
535 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; 521 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
536 lnum = next_log_lnum(c, lnum)) { 522 lnum = ubifs_next_log_lnum(c, lnum)) {
537 dbg_log("unmap log LEB %d", lnum); 523 dbg_log("unmap log LEB %d", lnum);
538 err = ubifs_leb_unmap(c, lnum); 524 err = ubifs_leb_unmap(c, lnum);
539 if (err) 525 if (err)
@@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
642 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); 628 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
643 if (err) 629 if (err)
644 return err; 630 return err;
645 *lnum = next_log_lnum(c, *lnum); 631 *lnum = ubifs_next_log_lnum(c, *lnum);
646 *offs = 0; 632 *offs = 0;
647 } 633 }
648 memcpy(buf + *offs, node, len); 634 memcpy(buf + *offs, node, len);
@@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
712 ubifs_scan_destroy(sleb); 698 ubifs_scan_destroy(sleb);
713 if (lnum == c->lhead_lnum) 699 if (lnum == c->lhead_lnum)
714 break; 700 break;
715 lnum = next_log_lnum(c, lnum); 701 lnum = ubifs_next_log_lnum(c, lnum);
716 } 702 }
717 if (offs) { 703 if (offs) {
718 int sz = ALIGN(offs, c->min_io_size); 704 int sz = ALIGN(offs, c->min_io_size);
@@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
732 /* Unmap remaining LEBs */ 718 /* Unmap remaining LEBs */
733 lnum = write_lnum; 719 lnum = write_lnum;
734 do { 720 do {
735 lnum = next_log_lnum(c, lnum); 721 lnum = ubifs_next_log_lnum(c, lnum);
736 err = ubifs_leb_unmap(c, lnum); 722 err = ubifs_leb_unmap(c, lnum);
737 if (err) 723 if (err)
738 return err; 724 return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 0ee0847f2421..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
1007} 1007}
1008 1008
1009/** 1009/**
1010 * struct scan_check_data - data provided to scan callback function.
1011 * @lst: LEB properties statistics
1012 * @err: error code
1013 */
1014struct scan_check_data {
1015 struct ubifs_lp_stats lst;
1016 int err;
1017};
1018
1019/**
1020 * scan_check_cb - scan callback. 1010 * scan_check_cb - scan callback.
1021 * @c: the UBIFS file-system description object 1011 * @c: the UBIFS file-system description object
1022 * @lp: LEB properties to scan 1012 * @lp: LEB properties to scan
1023 * @in_tree: whether the LEB properties are in main memory 1013 * @in_tree: whether the LEB properties are in main memory
1024 * @data: information passed to and from the caller of the scan 1014 * @lst: lprops statistics to update
1025 * 1015 *
1026 * This function returns a code that indicates whether the scan should continue 1016 * This function returns a code that indicates whether the scan should continue
1027 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree 1017 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,11 +1020,10 @@ struct scan_check_data {
1030 */ 1020 */
1031static int scan_check_cb(struct ubifs_info *c, 1021static int scan_check_cb(struct ubifs_info *c,
1032 const struct ubifs_lprops *lp, int in_tree, 1022 const struct ubifs_lprops *lp, int in_tree,
1033 struct scan_check_data *data) 1023 struct ubifs_lp_stats *lst)
1034{ 1024{
1035 struct ubifs_scan_leb *sleb; 1025 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1026 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; 1027 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1039 void *buf = NULL; 1028 void *buf = NULL;
1040 1029
@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
1044 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1033 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1045 ubifs_err("bad LEB category %d expected %d", 1034 ubifs_err("bad LEB category %d expected %d",
1046 (lp->flags & LPROPS_CAT_MASK), cat); 1035 (lp->flags & LPROPS_CAT_MASK), cat);
1047 goto out; 1036 return -EINVAL;
1048 } 1037 }
1049 } 1038 }
1050 1039
@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
1078 } 1067 }
1079 if (!found) { 1068 if (!found) {
1080 ubifs_err("bad LPT list (category %d)", cat); 1069 ubifs_err("bad LPT list (category %d)", cat);
1081 goto out; 1070 return -EINVAL;
1082 } 1071 }
1083 } 1072 }
1084 } 1073 }
@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
1090 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1079 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1091 lp != heap->arr[lp->hpos]) { 1080 lp != heap->arr[lp->hpos]) {
1092 ubifs_err("bad LPT heap (category %d)", cat); 1081 ubifs_err("bad LPT heap (category %d)", cat);
1093 goto out; 1082 return -EINVAL;
1094 } 1083 }
1095 } 1084 }
1096 1085
1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1086 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) { 1087 if (!buf)
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1088 return -ENOMEM;
1100 goto out; 1089
1090 /*
1091 * After an unclean unmount, empty and freeable LEBs
1092 * may contain garbage - do not scan them.
1093 */
1094 if (lp->free == c->leb_size) {
1095 lst->empty_lebs += 1;
1096 lst->total_free += c->leb_size;
1097 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1098 return LPT_SCAN_CONTINUE;
1099 }
1100 if (lp->free + lp->dirty == c->leb_size &&
1101 !(lp->flags & LPROPS_INDEX)) {
1102 lst->total_free += lp->free;
1103 lst->total_dirty += lp->dirty;
1104 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1105 return LPT_SCAN_CONTINUE;
1101 } 1106 }
1102 1107
1103 sleb = ubifs_scan(c, lnum, 0, buf, 0); 1108 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1104 if (IS_ERR(sleb)) { 1109 if (IS_ERR(sleb)) {
1105 /* 1110 ret = PTR_ERR(sleb);
1106 * After an unclean unmount, empty and freeable LEBs 1111 if (ret == -EUCLEAN) {
1107 * may contain garbage. 1112 dbg_dump_lprops(c);
1108 */ 1113 dbg_dump_budg(c, &c->bi);
1109 if (lp->free == c->leb_size) {
1110 ubifs_err("scan errors were in empty LEB "
1111 "- continuing checking");
1112 lst->empty_lebs += 1;
1113 lst->total_free += c->leb_size;
1114 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1115 ret = LPT_SCAN_CONTINUE;
1116 goto exit;
1117 }
1118
1119 if (lp->free + lp->dirty == c->leb_size &&
1120 !(lp->flags & LPROPS_INDEX)) {
1121 ubifs_err("scan errors were in freeable LEB "
1122 "- continuing checking");
1123 lst->total_free += lp->free;
1124 lst->total_dirty += lp->dirty;
1125 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1126 ret = LPT_SCAN_CONTINUE;
1127 goto exit;
1128 } 1114 }
1129 data->err = PTR_ERR(sleb); 1115 goto out;
1130 ret = LPT_SCAN_STOP;
1131 goto exit;
1132 } 1116 }
1133 1117
1134 is_idx = -1; 1118 is_idx = -1;
@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
1246 } 1230 }
1247 1231
1248 ubifs_scan_destroy(sleb); 1232 ubifs_scan_destroy(sleb);
1249 ret = LPT_SCAN_CONTINUE;
1250exit:
1251 vfree(buf); 1233 vfree(buf);
1252 return ret; 1234 return LPT_SCAN_CONTINUE;
1253 1235
1254out_print: 1236out_print:
1255 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1237 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1258,10 +1240,10 @@ out_print:
1258 dbg_dump_leb(c, lnum); 1240 dbg_dump_leb(c, lnum);
1259out_destroy: 1241out_destroy:
1260 ubifs_scan_destroy(sleb); 1242 ubifs_scan_destroy(sleb);
1243 ret = -EINVAL;
1261out: 1244out:
1262 vfree(buf); 1245 vfree(buf);
1263 data->err = -EINVAL; 1246 return ret;
1264 return LPT_SCAN_STOP;
1265} 1247}
1266 1248
1267/** 1249/**
@@ -1278,8 +1260,7 @@ out:
1278int dbg_check_lprops(struct ubifs_info *c) 1260int dbg_check_lprops(struct ubifs_info *c)
1279{ 1261{
1280 int i, err; 1262 int i, err;
1281 struct scan_check_data data; 1263 struct ubifs_lp_stats lst;
1282 struct ubifs_lp_stats *lst = &data.lst;
1283 1264
1284 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1265 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1285 return 0; 1266 return 0;
@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
1294 return err; 1275 return err;
1295 } 1276 }
1296 1277
1297 memset(lst, 0, sizeof(struct ubifs_lp_stats)); 1278 memset(&lst, 0, sizeof(struct ubifs_lp_stats));
1298
1299 data.err = 0;
1300 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, 1279 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1301 (ubifs_lpt_scan_callback)scan_check_cb, 1280 (ubifs_lpt_scan_callback)scan_check_cb,
1302 &data); 1281 &lst);
1303 if (err && err != -ENOSPC) 1282 if (err && err != -ENOSPC)
1304 goto out; 1283 goto out;
1305 if (data.err) {
1306 err = data.err;
1307 goto out;
1308 }
1309 1284
1310 if (lst->empty_lebs != c->lst.empty_lebs || 1285 if (lst.empty_lebs != c->lst.empty_lebs ||
1311 lst->idx_lebs != c->lst.idx_lebs || 1286 lst.idx_lebs != c->lst.idx_lebs ||
1312 lst->total_free != c->lst.total_free || 1287 lst.total_free != c->lst.total_free ||
1313 lst->total_dirty != c->lst.total_dirty || 1288 lst.total_dirty != c->lst.total_dirty ||
1314 lst->total_used != c->lst.total_used) { 1289 lst.total_used != c->lst.total_used) {
1315 ubifs_err("bad overall accounting"); 1290 ubifs_err("bad overall accounting");
1316 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1291 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1317 "total_free %lld, total_dirty %lld, total_used %lld", 1292 "total_free %lld, total_dirty %lld, total_used %lld",
1318 lst->empty_lebs, lst->idx_lebs, lst->total_free, 1293 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1319 lst->total_dirty, lst->total_used); 1294 lst.total_dirty, lst.total_used);
1320 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1295 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1321 "total_free %lld, total_dirty %lld, total_used %lld", 1296 "total_free %lld, total_dirty %lld, total_used %lld",
1322 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1297 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1325 goto out; 1300 goto out;
1326 } 1301 }
1327 1302
1328 if (lst->total_dead != c->lst.total_dead || 1303 if (lst.total_dead != c->lst.total_dead ||
1329 lst->total_dark != c->lst.total_dark) { 1304 lst.total_dark != c->lst.total_dark) {
1330 ubifs_err("bad dead/dark space accounting"); 1305 ubifs_err("bad dead/dark space accounting");
1331 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1306 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1332 lst->total_dead, lst->total_dark); 1307 lst.total_dead, lst.total_dark);
1333 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1308 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1334 c->lst.total_dead, c->lst.total_dark); 1309 c->lst.total_dead, c->lst.total_dark);
1335 err = -EINVAL; 1310 err = -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0c9c69bd983a..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include "ubifs.h" 30#include "ubifs.h"
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_populate_lsave(struct ubifs_info *c);
34#else
35#define dbg_populate_lsave(c) 0
36#endif
37
32/** 38/**
33 * first_dirty_cnode - find first dirty cnode. 39 * first_dirty_cnode - find first dirty cnode.
34 * @c: UBIFS file-system description object 40 * @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
586 if (nnode->nbranch[iip].lnum) 592 if (nnode->nbranch[iip].lnum)
587 break; 593 break;
588 } 594 }
589 } while (iip >= UBIFS_LPT_FANOUT); 595 } while (iip >= UBIFS_LPT_FANOUT);
590 596
591 /* Go right */ 597 /* Go right */
592 nnode = ubifs_get_nnode(c, nnode, iip); 598 nnode = ubifs_get_nnode(c, nnode, iip);
@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
815 c->lpt_drty_flgs |= LSAVE_DIRTY; 821 c->lpt_drty_flgs |= LSAVE_DIRTY;
816 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); 822 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
817 } 823 }
824
825 if (dbg_populate_lsave(c))
826 return;
827
818 list_for_each_entry(lprops, &c->empty_list, list) { 828 list_for_each_entry(lprops, &c->empty_list, list) {
819 c->lsave[cnt++] = lprops->lnum; 829 c->lsave[cnt++] = lprops->lnum;
820 if (cnt >= c->lsave_cnt) 830 if (cnt >= c->lsave_cnt)
@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1994 current->pid); 2004 current->pid);
1995} 2005}
1996 2006
2007/**
2008 * dbg_populate_lsave - debugging version of 'populate_lsave()'
2009 * @c: UBIFS file-system description object
2010 *
2011 * This is a debugging version for 'populate_lsave()' which populates lsave
2012 * with random LEBs instead of useful LEBs, which is good for test coverage.
2013 * Returns zero if lsave has not been populated (this debugging feature is
2014 * disabled) an non-zero if lsave has been populated.
2015 */
2016static int dbg_populate_lsave(struct ubifs_info *c)
2017{
2018 struct ubifs_lprops *lprops;
2019 struct ubifs_lpt_heap *heap;
2020 int i;
2021
2022 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2023 return 0;
2024 if (random32() & 3)
2025 return 0;
2026
2027 for (i = 0; i < c->lsave_cnt; i++)
2028 c->lsave[i] = c->main_first;
2029
2030 list_for_each_entry(lprops, &c->empty_list, list)
2031 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2032 list_for_each_entry(lprops, &c->freeable_list, list)
2033 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2034 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2035 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2036
2037 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2038 for (i = 0; i < heap->cnt; i++)
2039 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2040 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2041 for (i = 0; i < heap->cnt; i++)
2042 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2043 heap = &c->lpt_heap[LPROPS_FREE - 1];
2044 for (i = 0; i < heap->cnt; i++)
2045 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2046
2047 return 1;
2048}
2049
1997#endif /* CONFIG_UBIFS_FS_DEBUG */ 2050#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 21f47afdacff..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
148 } 148 }
149 149
150 main_sz = (long long)c->main_lebs * c->leb_size; 150 main_sz = (long long)c->main_lebs * c->leb_size;
151 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { 151 if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
152 err = 9; 152 err = 9;
153 goto out; 153 goto out;
154 } 154 }
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
218 } 218 }
219 219
220 if (c->lst.total_dead + c->lst.total_dark + 220 if (c->lst.total_dead + c->lst.total_dark +
221 c->lst.total_used + c->old_idx_sz > main_sz) { 221 c->lst.total_used + c->bi.old_idx_sz > main_sz) {
222 err = 21; 222 err = 21;
223 goto out; 223 goto out;
224 } 224 }
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); 286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); 287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); 288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
289 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); 289 c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); 290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); 291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); 292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); 305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); 306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
307 307
308 c->calc_idx_sz = c->old_idx_sz; 308 c->calc_idx_sz = c->bi.old_idx_sz;
309 309
310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) 310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
311 c->no_orphs = 1; 311 c->no_orphs = 1;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index c3de04dc952a..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
340 mutex_unlock(&c->lp_mutex); 340 mutex_unlock(&c->lp_mutex);
341} 341}
342 342
343/**
344 * ubifs_next_log_lnum - switch to the next log LEB.
345 * @c: UBIFS file-system description object
346 * @lnum: current log LEB
347 *
348 * This helper function returns the log LEB number which goes next after LEB
349 * 'lnum'.
350 */
351static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
352{
353 lnum += 1;
354 if (lnum > c->log_last)
355 lnum = UBIFS_LOG_LNUM;
356
357 return lnum;
358}
359
343#endif /* __UBIFS_MISC_H__ */ 360#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 09df318e368f..bd644bf587a8 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0);
677 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
678 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
679 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_incomplete_group - drop nodes from an incomplete group. 567 * drop_last_node - drop the last node or group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
570 * 571 *
571 * This function returns %1 if nodes are dropped and %0 otherwise. 572 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero.
574 * This function returns %1 if a node was dropped and %0 otherwise.
572 */ 575 */
573static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
574{ 577{
575 int dropped = 0; 578 int dropped = 0;
576 579
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
589 kfree(snod); 592 kfree(snod);
590 sleb->nodes_cnt -= 1; 593 sleb->nodes_cnt -= 1;
591 dropped = 1; 594 dropped = 1;
595 if (!grouped)
596 break;
592 } 597 }
593 return dropped; 598 return dropped;
594} 599}
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
609struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
610 int offs, void *sbuf, int grouped) 615 int offs, void *sbuf, int grouped)
611{ 616{
612 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
613 int empty_chkd = 0, start = offs;
614 struct ubifs_scan_leb *sleb; 618 struct ubifs_scan_leb *sleb;
615 void *buf = sbuf + offs; 619 void *buf = sbuf + offs;
616 620
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
620 if (IS_ERR(sleb)) 624 if (IS_ERR(sleb))
621 return sleb; 625 return sleb;
622 626
623 if (sleb->ecc) 627 ubifs_assert(len >= 8);
624 need_clean = 1;
625
626 while (len >= 8) { 628 while (len >= 8) {
627 int ret;
628
629 dbg_scan("look at LEB %d:%d (%d bytes left)", 629 dbg_scan("look at LEB %d:%d (%d bytes left)",
630 lnum, offs, len); 630 lnum, offs, len);
631 631
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 635 * Scan quietly until there is an error from which we cannot
636 * recover 636 * recover
637 */ 637 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
639
640 if (ret == SCANNED_A_NODE) { 639 if (ret == SCANNED_A_NODE) {
641 /* A valid node, and not a padding node */ 640 /* A valid node, and not a padding node */
642 struct ubifs_ch *ch = buf; 641 struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
649 offs += node_len; 648 offs += node_len;
650 buf += node_len; 649 buf += node_len;
651 len -= node_len; 650 len -= node_len;
652 continue; 651 } else if (ret > 0) {
653 }
654
655 if (ret > 0) {
656 /* Padding bytes or a valid padding node */ 652 /* Padding bytes or a valid padding node */
657 offs += ret; 653 offs += ret;
658 buf += ret; 654 buf += ret;
659 len -= ret; 655 len -= ret;
660 continue; 656 } else if (ret == SCANNED_EMPTY_SPACE ||
661 } 657 ret == SCANNED_GARBAGE ||
662 658 ret == SCANNED_A_BAD_PAD_NODE ||
663 if (ret == SCANNED_EMPTY_SPACE) { 659 ret == SCANNED_A_CORRUPT_NODE) {
664 if (!is_empty(buf, len)) { 660 dbg_rcvry("found corruption - %d", ret);
665 if (!is_last_write(c, buf, offs))
666 break;
667 clean_buf(c, &buf, lnum, &offs, &len);
668 need_clean = 1;
669 }
670 empty_chkd = 1;
671 break; 661 break;
672 } 662 } else {
673 663 dbg_err("unexpected return value %d", ret);
674 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
675 if (is_last_write(c, buf, offs)) {
676 clean_buf(c, &buf, lnum, &offs, &len);
677 need_clean = 1;
678 empty_chkd = 1;
679 break;
680 }
681
682 if (ret == SCANNED_A_CORRUPT_NODE)
683 if (no_more_nodes(c, buf, len, lnum, offs)) {
684 clean_buf(c, &buf, lnum, &offs, &len);
685 need_clean = 1;
686 empty_chkd = 1;
687 break;
688 }
689
690 if (quiet) {
691 /* Redo the last scan but noisily */
692 quiet = 0;
693 continue;
694 }
695
696 switch (ret) {
697 case SCANNED_GARBAGE:
698 dbg_err("garbage");
699 goto corrupted;
700 case SCANNED_A_CORRUPT_NODE:
701 case SCANNED_A_BAD_PAD_NODE:
702 dbg_err("bad node");
703 goto corrupted;
704 default:
705 dbg_err("unknown");
706 err = -EINVAL; 664 err = -EINVAL;
707 goto error; 665 goto error;
708 } 666 }
709 } 667 }
710 668
711 if (!empty_chkd && !is_empty(buf, len)) { 669 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
712 if (is_last_write(c, buf, offs)) { 670 if (!is_last_write(c, buf, offs))
713 clean_buf(c, &buf, lnum, &offs, &len); 671 goto corrupted_rescan;
714 need_clean = 1; 672 } else if (ret == SCANNED_A_CORRUPT_NODE) {
715 } else { 673 if (!no_more_nodes(c, buf, len, lnum, offs))
674 goto corrupted_rescan;
675 } else if (!is_empty(buf, len)) {
676 if (!is_last_write(c, buf, offs)) {
716 int corruption = first_non_ff(buf, len); 677 int corruption = first_non_ff(buf, len);
717 678
718 /* 679 /*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
728 } 689 }
729 } 690 }
730 691
731 /* Drop nodes from incomplete group */ 692 min_io_unit = round_down(offs, c->min_io_size);
732 if (grouped && drop_incomplete_group(sleb, &offs)) { 693 if (grouped)
733 buf = sbuf + offs; 694 /*
734 len = c->leb_size - offs; 695 * If nodes are grouped, always drop the incomplete group at
735 clean_buf(c, &buf, lnum, &offs, &len); 696 * the end.
736 need_clean = 1; 697 */
737 } 698 drop_last_node(sleb, &offs, 1);
738 699
739 if (offs % c->min_io_size) { 700 /*
740 clean_buf(c, &buf, lnum, &offs, &len); 701 * While we are in the middle of the same min. I/O unit keep dropping
741 need_clean = 1; 702 * nodes. So basically, what we want is to make sure that the last min.
742 } 703 * I/O unit where we saw the corruption is dropped completely with all
704 * the uncorrupted node which may possibly sit there.
705 *
706 * In other words, let's name the min. I/O unit where the corruption
707 * starts B, and the previous min. I/O unit A. The below code tries to
708 * deal with a situation when half of B contains valid nodes or the end
709 * of a valid node, and the second half of B contains corrupted data or
710 * garbage. This means that UBIFS had been writing to B just before the
711 * power cut happened. I do not know how realistic is this scenario
712 * that half of the min. I/O unit had been written successfully and the
713 * other half not, but this is possible in our 'failure mode emulation'
714 * infrastructure at least.
715 *
716 * So what is the problem, why we need to drop those nodes? Whey can't
717 * we just clean-up the second half of B by putting a padding node
718 * there? We can, and this works fine with one exception which was
719 * reproduced with power cut emulation testing and happens extremely
720 * rarely. The description follows, but it is worth noting that that is
721 * only about the GC head, so we could do this trick only if the bud
722 * belongs to the GC head, but it does not seem to be worth an
723 * additional "if" statement.
724 *
725 * So, imagine the file-system is full, we run GC which is moving valid
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
728 * and will try to continue. Imagine that LEB X is currently the
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the
730 * same as amount of free space in LEB X.
731 *
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We
733 * are here trying to recover LEB Y which is the GC head LEB. We find
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
736 * fails, because it cannot find a dirty LEB which could be GC'd into
737 * LEB Y! Even LEB X does not match because the amount of valid nodes
738 * there does not fit the free space in LEB Y any more! And this is
739 * because of the padding node which we added to LEB Y. The
740 * user-visible effect of this which I once observed and analysed is
741 * that we cannot mount the file-system with -ENOSPC error.
742 *
743 * So obviously, to make sure that situation does not happen we should
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O
745 * unit in LEB Y should be A. This is basically what the below code
746 * tries to do.
747 */
748 while (min_io_unit == round_down(offs, c->min_io_size) &&
749 min_io_unit != offs &&
750 drop_last_node(sleb, &offs, grouped));
751
752 buf = sbuf + offs;
753 len = c->leb_size - offs;
743 754
755 clean_buf(c, &buf, lnum, &offs, &len);
744 ubifs_end_scan(c, sleb, lnum, offs); 756 ubifs_end_scan(c, sleb, lnum, offs);
745 757
746 if (need_clean) { 758 err = fix_unclean_leb(c, sleb, start);
747 err = fix_unclean_leb(c, sleb, start); 759 if (err)
748 if (err) 760 goto error;
749 goto error;
750 }
751 761
752 return sleb; 762 return sleb;
753 763
764corrupted_rescan:
765 /* Re-scan the corrupted data with verbose messages */
766 dbg_err("corruptio %d", ret);
767 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
754corrupted: 768corrupted:
755 ubifs_scanned_corruption(c, lnum, offs, buf); 769 ubifs_scanned_corruption(c, lnum, offs, buf);
756 err = -EUCLEAN; 770 err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1070} 1084}
1071 1085
1072/** 1086/**
1087 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1088 * @c: UBIFS file-system description object
1089 *
1090 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1091 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1092 * zero in case of success and a negative error code in case of failure.
1093 */
1094static int grab_empty_leb(struct ubifs_info *c)
1095{
1096 int lnum, err;
1097
1098 /*
1099 * Note, it is very important to first search for an empty LEB and then
1100 * run the commit, not vice-versa. The reason is that there might be
1101 * only one empty LEB at the moment, the one which has been the
1102 * @c->gc_lnum just before the power cut happened. During the regular
1103 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1104 * one but GC can grab it. But at this moment this single empty LEB is
1105 * not marked as taken, so if we run commit - what happens? Right, the
1106 * commit will grab it and write the index there. Remember that the
1107 * index always expands as long as there is free space, and it only
1108 * starts consolidating when we run out of space.
1109 *
1110 * IOW, if we run commit now, we might not be able to find a free LEB
1111 * after this.
1112 */
1113 lnum = ubifs_find_free_leb_for_idx(c);
1114 if (lnum < 0) {
1115 dbg_err("could not find an empty LEB");
1116 dbg_dump_lprops(c);
1117 dbg_dump_budg(c, &c->bi);
1118 return lnum;
1119 }
1120
1121 /* Reset the index flag */
1122 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1123 LPROPS_INDEX, 0);
1124 if (err)
1125 return err;
1126
1127 c->gc_lnum = lnum;
1128 dbg_rcvry("found empty LEB %d, run commit", lnum);
1129
1130 return ubifs_run_commit(c);
1131}
1132
1133/**
1073 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1134 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074 * @c: UBIFS file-system description object 1135 * @c: UBIFS file-system description object
1075 * 1136 *
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1091{ 1152{
1092 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1153 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093 struct ubifs_lprops lp; 1154 struct ubifs_lprops lp;
1094 int lnum, err; 1155 int err;
1156
1157 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1095 1158
1096 c->gc_lnum = -1; 1159 c->gc_lnum = -1;
1097 if (wbuf->lnum == -1) { 1160 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1098 dbg_rcvry("no GC head LEB"); 1161 return grab_empty_leb(c);
1099 goto find_free; 1162
1100 }
1101 /*
1102 * See whether the used space in the dirtiest LEB fits in the GC head
1103 * LEB.
1104 */
1105 if (wbuf->offs == c->leb_size) {
1106 dbg_rcvry("no room in GC head LEB");
1107 goto find_free;
1108 }
1109 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1163 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1110 if (err) { 1164 if (err) {
1111 /* 1165 if (err != -ENOSPC)
1112 * There are no dirty or empty LEBs subject to here being
1113 * enough for the index. Try to use
1114 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1115 * LEBs (ignoring index requirements). If the index then
1116 * doesn't have enough LEBs the recovery commit will fail -
1117 * which is the same result anyway i.e. recovery fails. So
1118 * there is no problem ignoring index requirements and just
1119 * grabbing a free LEB since we have already established there
1120 * is not a dirty LEB we could have used instead.
1121 */
1122 if (err == -ENOSPC) {
1123 dbg_rcvry("could not find a dirty LEB");
1124 goto find_free;
1125 }
1126 return err;
1127 }
1128 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1129 lnum = lp.lnum;
1130 if (lp.free + lp.dirty == c->leb_size) {
1131 /* An empty LEB was returned */
1132 if (lp.free != c->leb_size) {
1133 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1134 0, 0, 0, 0);
1135 if (err)
1136 return err;
1137 }
1138 err = ubifs_leb_unmap(c, lnum);
1139 if (err)
1140 return err; 1166 return err;
1141 c->gc_lnum = lnum; 1167
1142 dbg_rcvry("allocated LEB %d for GC", lnum); 1168 dbg_rcvry("could not find a dirty LEB");
1143 /* Run the commit */ 1169 return grab_empty_leb(c);
1144 dbg_rcvry("committing");
1145 return ubifs_run_commit(c);
1146 }
1147 /*
1148 * There was no empty LEB so the used space in the dirtiest LEB must fit
1149 * in the GC head LEB.
1150 */
1151 if (lp.free + lp.dirty < wbuf->offs) {
1152 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153 lnum, wbuf->lnum, wbuf->offs);
1154 err = ubifs_return_leb(c, lnum);
1155 if (err)
1156 return err;
1157 goto find_free;
1158 } 1170 }
1171
1172 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1173 ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1174
1159 /* 1175 /*
1160 * We run the commit before garbage collection otherwise subsequent 1176 * We run the commit before garbage collection otherwise subsequent
1161 * mounts will see the GC and orphan deletion in a different order. 1177 * mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1164 err = ubifs_run_commit(c); 1180 err = ubifs_run_commit(c);
1165 if (err) 1181 if (err)
1166 return err; 1182 return err;
1167 /* 1183
1168 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1184 dbg_rcvry("GC'ing LEB %d", lp.lnum);
1169 * - use locking to keep 'ubifs_assert()' happy.
1170 */
1171 dbg_rcvry("GC'ing LEB %d", lnum);
1172 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1185 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1173 err = ubifs_garbage_collect_leb(c, &lp); 1186 err = ubifs_garbage_collect_leb(c, &lp);
1174 if (err >= 0) { 1187 if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1184 err = -EINVAL; 1197 err = -EINVAL;
1185 return err; 1198 return err;
1186 } 1199 }
1187 if (err != LEB_RETAINED) { 1200
1188 dbg_err("GC returned %d", err); 1201 ubifs_assert(err == LEB_RETAINED);
1202 if (err != LEB_RETAINED)
1189 return -EINVAL; 1203 return -EINVAL;
1190 } 1204
1191 err = ubifs_leb_unmap(c, c->gc_lnum); 1205 err = ubifs_leb_unmap(c, c->gc_lnum);
1192 if (err) 1206 if (err)
1193 return err; 1207 return err;
1194 dbg_rcvry("allocated LEB %d for GC", lnum);
1195 return 0;
1196 1208
1197find_free: 1209 dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1198 /* 1210 return 0;
1199 * There is no GC head LEB or the free space in the GC head LEB is too
1200 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1202 */
1203 lnum = ubifs_find_free_leb_for_idx(c);
1204 if (lnum < 0) {
1205 dbg_err("could not find an empty LEB");
1206 return lnum;
1207 }
1208 /* And reset the index flag */
1209 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1210 LPROPS_INDEX, 0);
1211 if (err)
1212 return err;
1213 c->gc_lnum = lnum;
1214 dbg_rcvry("allocated LEB %d for GC", lnum);
1215 /* Run the commit */
1216 dbg_rcvry("committing");
1217 return ubifs_run_commit(c);
1218} 1211}
1219 1212
1220/** 1213/**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1456 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1449 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1457 if (err) 1450 if (err)
1458 goto out; 1451 goto out;
1459 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1452 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1460 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1453 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1461 return 0; 1454 return 0;
1462 1455
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
1505 e->i_size = le64_to_cpu(ino->size); 1498 e->i_size = le64_to_cpu(ino->size);
1506 } 1499 }
1507 } 1500 }
1501
1508 if (e->exists && e->i_size < e->d_size) { 1502 if (e->exists && e->i_size < e->d_size) {
1509 if (!e->inode && c->ro_mount) { 1503 if (c->ro_mount) {
1510 /* Fix the inode size and pin it in memory */ 1504 /* Fix the inode size and pin it in memory */
1511 struct inode *inode; 1505 struct inode *inode;
1506 struct ubifs_inode *ui;
1507
1508 ubifs_assert(!e->inode);
1512 1509
1513 inode = ubifs_iget(c->vfs_sb, e->inum); 1510 inode = ubifs_iget(c->vfs_sb, e->inum);
1514 if (IS_ERR(inode)) 1511 if (IS_ERR(inode))
1515 return PTR_ERR(inode); 1512 return PTR_ERR(inode);
1513
1514 ui = ubifs_inode(inode);
1516 if (inode->i_size < e->d_size) { 1515 if (inode->i_size < e->d_size) {
1517 dbg_rcvry("ino %lu size %lld -> %lld", 1516 dbg_rcvry("ino %lu size %lld -> %lld",
1518 (unsigned long)e->inum, 1517 (unsigned long)e->inum,
1519 e->d_size, inode->i_size); 1518 inode->i_size, e->d_size);
1520 inode->i_size = e->d_size; 1519 inode->i_size = e->d_size;
1521 ubifs_inode(inode)->ui_size = e->d_size; 1520 ui->ui_size = e->d_size;
1521 ui->synced_i_size = e->d_size;
1522 e->inode = inode; 1522 e->inode = inode;
1523 this = rb_next(this); 1523 this = rb_next(this);
1524 continue; 1524 continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
1533 iput(e->inode); 1533 iput(e->inode);
1534 } 1534 }
1535 } 1535 }
1536
1536 this = rb_next(this); 1537 this = rb_next(this);
1537 rb_erase(&e->rb, &c->size_tree); 1538 rb_erase(&e->rb, &c->size_tree);
1538 kfree(e); 1539 kfree(e);
1539 } 1540 }
1541
1540 return 0; 1542 return 0;
1541} 1543}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index d3d6d365bfc1..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,44 +33,32 @@
33 */ 33 */
34 34
35#include "ubifs.h" 35#include "ubifs.h"
36 36#include <linux/list_sort.h>
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47 37
48/** 38/**
49 * struct replay_entry - replay tree entry. 39 * struct replay_entry - replay list entry.
50 * @lnum: logical eraseblock number of the node 40 * @lnum: logical eraseblock number of the node
51 * @offs: node offset 41 * @offs: node offset
52 * @len: node length 42 * @len: node length
43 * @deletion: non-zero if this entry corresponds to a node deletion
53 * @sqnum: node sequence number 44 * @sqnum: node sequence number
54 * @flags: replay flags 45 * @list: links the replay list
55 * @rb: links the replay tree
56 * @key: node key 46 * @key: node key
57 * @nm: directory entry name 47 * @nm: directory entry name
58 * @old_size: truncation old size 48 * @old_size: truncation old size
59 * @new_size: truncation new size 49 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * @jhead: journal head number of the bud
63 * 50 *
64 * UBIFS journal replay must compare node sequence numbers, which means it must 51 * The replay process first scans all buds and builds the replay list, then
65 * build a tree of node information to insert into the TNC. 52 * sorts the replay list in nodes sequence number order, and then inserts all
53 * the replay entries to the TNC.
66 */ 54 */
67struct replay_entry { 55struct replay_entry {
68 int lnum; 56 int lnum;
69 int offs; 57 int offs;
70 int len; 58 int len;
59 unsigned int deletion:1;
71 unsigned long long sqnum; 60 unsigned long long sqnum;
72 int flags; 61 struct list_head list;
73 struct rb_node rb;
74 union ubifs_key key; 62 union ubifs_key key;
75 union { 63 union {
76 struct qstr nm; 64 struct qstr nm;
@@ -78,11 +66,6 @@ struct replay_entry {
78 loff_t old_size; 66 loff_t old_size;
79 loff_t new_size; 67 loff_t new_size;
80 }; 68 };
81 struct {
82 int free;
83 int dirty;
84 int jhead;
85 };
86 }; 69 };
87}; 70};
88 71
@@ -90,57 +73,64 @@ struct replay_entry {
90 * struct bud_entry - entry in the list of buds to replay. 73 * struct bud_entry - entry in the list of buds to replay.
91 * @list: next bud in the list 74 * @list: next bud in the list
92 * @bud: bud description object 75 * @bud: bud description object
93 * @free: free bytes in the bud
94 * @sqnum: reference node sequence number 76 * @sqnum: reference node sequence number
77 * @free: free bytes in the bud
78 * @dirty: dirty bytes in the bud
95 */ 79 */
96struct bud_entry { 80struct bud_entry {
97 struct list_head list; 81 struct list_head list;
98 struct ubifs_bud *bud; 82 struct ubifs_bud *bud;
99 int free;
100 unsigned long long sqnum; 83 unsigned long long sqnum;
84 int free;
85 int dirty;
101}; 86};
102 87
103/** 88/**
104 * set_bud_lprops - set free and dirty space used by a bud. 89 * set_bud_lprops - set free and dirty space used by a bud.
105 * @c: UBIFS file-system description object 90 * @c: UBIFS file-system description object
106 * @r: replay entry of bud 91 * @b: bud entry which describes the bud
92 *
93 * This function makes sure the LEB properties of bud @b are set correctly
94 * after the replay. Returns zero in case of success and a negative error code
95 * in case of failure.
107 */ 96 */
108static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 97static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
109{ 98{
110 const struct ubifs_lprops *lp; 99 const struct ubifs_lprops *lp;
111 int err = 0, dirty; 100 int err = 0, dirty;
112 101
113 ubifs_get_lprops(c); 102 ubifs_get_lprops(c);
114 103
115 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
116 if (IS_ERR(lp)) { 105 if (IS_ERR(lp)) {
117 err = PTR_ERR(lp); 106 err = PTR_ERR(lp);
118 goto out; 107 goto out;
119 } 108 }
120 109
121 dirty = lp->dirty; 110 dirty = lp->dirty;
122 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
123 /* 112 /*
124 * The LEB was added to the journal with a starting offset of 113 * The LEB was added to the journal with a starting offset of
125 * zero which means the LEB must have been empty. The LEB 114 * zero which means the LEB must have been empty. The LEB
126 * property values should be lp->free == c->leb_size and 115 * property values should be @lp->free == @c->leb_size and
127 * lp->dirty == 0, but that is not the case. The reason is that 116 * @lp->dirty == 0, but that is not the case. The reason is that
128 * the LEB was garbage collected. The garbage collector resets 117 * the LEB had been garbage collected before it became the bud,
129 * the free and dirty space without recording it anywhere except 118 * and there was not commit inbetween. The garbage collector
130 * lprops, so if there is not a commit then lprops does not have 119 * resets the free and dirty space without recording it
131 * that information next time the file system is mounted. 120 * anywhere except lprops, so if there was no commit then
121 * lprops does not have that information.
132 * 122 *
133 * We do not need to adjust free space because the scan has told 123 * We do not need to adjust free space because the scan has told
134 * us the exact value which is recorded in the replay entry as 124 * us the exact value which is recorded in the replay entry as
135 * r->free. 125 * @b->free.
136 * 126 *
137 * However we do need to subtract from the dirty space the 127 * However we do need to subtract from the dirty space the
138 * amount of space that the garbage collector reclaimed, which 128 * amount of space that the garbage collector reclaimed, which
139 * is the whole LEB minus the amount of space that was free. 129 * is the whole LEB minus the amount of space that was free.
140 */ 130 */
141 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
142 lp->free, lp->dirty); 132 lp->free, lp->dirty);
143 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
144 lp->free, lp->dirty); 134 lp->free, lp->dirty);
145 dirty -= c->leb_size - lp->free; 135 dirty -= c->leb_size - lp->free;
146 /* 136 /*
@@ -152,10 +142,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
152 */ 142 */
153 if (dirty != 0) 143 if (dirty != 0)
154 dbg_msg("LEB %d lp: %d free %d dirty " 144 dbg_msg("LEB %d lp: %d free %d dirty "
155 "replay: %d free %d dirty", r->lnum, lp->free, 145 "replay: %d free %d dirty", b->bud->lnum,
156 lp->dirty, r->free, r->dirty); 146 lp->free, lp->dirty, b->free, b->dirty);
157 } 147 }
158 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
159 lp->flags | LPROPS_TAKEN, 0); 149 lp->flags | LPROPS_TAKEN, 0);
160 if (IS_ERR(lp)) { 150 if (IS_ERR(lp)) {
161 err = PTR_ERR(lp); 151 err = PTR_ERR(lp);
@@ -163,8 +153,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
163 } 153 }
164 154
165 /* Make sure the journal head points to the latest bud */ 155 /* Make sure the journal head points to the latest bud */
166 err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, 156 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
167 c->leb_size - r->free, UBI_SHORTTERM); 157 b->bud->lnum, c->leb_size - b->free,
158 UBI_SHORTTERM);
168 159
169out: 160out:
170 ubifs_release_lprops(c); 161 ubifs_release_lprops(c);
@@ -172,6 +163,27 @@ out:
172} 163}
173 164
174/** 165/**
166 * set_buds_lprops - set free and dirty space for all replayed buds.
167 * @c: UBIFS file-system description object
168 *
169 * This function sets LEB properties for all replayed buds. Returns zero in
170 * case of success and a negative error code in case of failure.
171 */
172static int set_buds_lprops(struct ubifs_info *c)
173{
174 struct bud_entry *b;
175 int err;
176
177 list_for_each_entry(b, &c->replay_buds, list) {
178 err = set_bud_lprops(c, b);
179 if (err)
180 return err;
181 }
182
183 return 0;
184}
185
186/**
175 * trun_remove_range - apply a replay entry for a truncation to the TNC. 187 * trun_remove_range - apply a replay entry for a truncation to the TNC.
176 * @c: UBIFS file-system description object 188 * @c: UBIFS file-system description object
177 * @r: replay entry of truncation 189 * @r: replay entry of truncation
@@ -207,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
207 */ 219 */
208static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 220static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
209{ 221{
210 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 222 int err;
211 223
212 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 224 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
213 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 225 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
214 226
215 /* Set c->replay_sqnum to help deal with dangling branches. */ 227 /* Set c->replay_sqnum to help deal with dangling branches. */
216 c->replay_sqnum = r->sqnum; 228 c->replay_sqnum = r->sqnum;
217 229
218 if (r->flags & REPLAY_REF) 230 if (is_hash_key(c, &r->key)) {
219 err = set_bud_lprops(c, r); 231 if (r->deletion)
220 else if (is_hash_key(c, &r->key)) {
221 if (deletion)
222 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 232 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
223 else 233 else
224 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 234 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
225 r->len, &r->nm); 235 r->len, &r->nm);
226 } else { 236 } else {
227 if (deletion) 237 if (r->deletion)
228 switch (key_type(c, &r->key)) { 238 switch (key_type(c, &r->key)) {
229 case UBIFS_INO_KEY: 239 case UBIFS_INO_KEY:
230 { 240 {
@@ -247,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
247 return err; 257 return err;
248 258
249 if (c->need_recovery) 259 if (c->need_recovery)
250 err = ubifs_recover_size_accum(c, &r->key, deletion, 260 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
251 r->new_size); 261 r->new_size);
252 } 262 }
253 263
@@ -255,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
255} 265}
256 266
257/** 267/**
258 * destroy_replay_tree - destroy the replay. 268 * replay_entries_cmp - compare 2 replay entries.
259 * @c: UBIFS file-system description object 269 * @priv: UBIFS file-system description object
270 * @a: first replay entry
271 * @a: second replay entry
260 * 272 *
261 * Destroy the replay tree. 273 * This is a comparios function for 'list_sort()' which compares 2 replay
274 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
275 * greater sequence number and %-1 otherwise.
262 */ 276 */
263static void destroy_replay_tree(struct ubifs_info *c) 277static int replay_entries_cmp(void *priv, struct list_head *a,
278 struct list_head *b)
264{ 279{
265 struct rb_node *this = c->replay_tree.rb_node; 280 struct replay_entry *ra, *rb;
266 struct replay_entry *r; 281
267 282 cond_resched();
268 while (this) { 283 if (a == b)
269 if (this->rb_left) { 284 return 0;
270 this = this->rb_left; 285
271 continue; 286 ra = list_entry(a, struct replay_entry, list);
272 } else if (this->rb_right) { 287 rb = list_entry(b, struct replay_entry, list);
273 this = this->rb_right; 288 ubifs_assert(ra->sqnum != rb->sqnum);
274 continue; 289 if (ra->sqnum > rb->sqnum)
275 } 290 return 1;
276 r = rb_entry(this, struct replay_entry, rb); 291 return -1;
277 this = rb_parent(this);
278 if (this) {
279 if (this->rb_left == &r->rb)
280 this->rb_left = NULL;
281 else
282 this->rb_right = NULL;
283 }
284 if (is_hash_key(c, &r->key))
285 kfree(r->nm.name);
286 kfree(r);
287 }
288 c->replay_tree = RB_ROOT;
289} 292}
290 293
291/** 294/**
292 * apply_replay_tree - apply the replay tree to the TNC. 295 * apply_replay_list - apply the replay list to the TNC.
293 * @c: UBIFS file-system description object 296 * @c: UBIFS file-system description object
294 * 297 *
295 * Apply the replay tree. 298 * Apply all entries in the replay list to the TNC. Returns zero in case of
296 * Returns zero in case of success and a negative error code in case of 299 * success and a negative error code in case of failure.
297 * failure.
298 */ 300 */
299static int apply_replay_tree(struct ubifs_info *c) 301static int apply_replay_list(struct ubifs_info *c)
300{ 302{
301 struct rb_node *this = rb_first(&c->replay_tree); 303 struct replay_entry *r;
304 int err;
302 305
303 while (this) { 306 list_sort(c, &c->replay_list, &replay_entries_cmp);
304 struct replay_entry *r;
305 int err;
306 307
308 list_for_each_entry(r, &c->replay_list, list) {
307 cond_resched(); 309 cond_resched();
308 310
309 r = rb_entry(this, struct replay_entry, rb);
310 err = apply_replay_entry(c, r); 311 err = apply_replay_entry(c, r);
311 if (err) 312 if (err)
312 return err; 313 return err;
313 this = rb_next(this);
314 } 314 }
315
315 return 0; 316 return 0;
316} 317}
317 318
318/** 319/**
319 * insert_node - insert a node to the replay tree. 320 * destroy_replay_list - destroy the replay.
321 * @c: UBIFS file-system description object
322 *
323 * Destroy the replay list.
324 */
325static void destroy_replay_list(struct ubifs_info *c)
326{
327 struct replay_entry *r, *tmp;
328
329 list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
330 if (is_hash_key(c, &r->key))
331 kfree(r->nm.name);
332 list_del(&r->list);
333 kfree(r);
334 }
335}
336
337/**
338 * insert_node - insert a node to the replay list
320 * @c: UBIFS file-system description object 339 * @c: UBIFS file-system description object
321 * @lnum: node logical eraseblock number 340 * @lnum: node logical eraseblock number
322 * @offs: node offset 341 * @offs: node offset
@@ -328,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
328 * @old_size: truncation old size 347 * @old_size: truncation old size
329 * @new_size: truncation new size 348 * @new_size: truncation new size
330 * 349 *
331 * This function inserts a scanned non-direntry node to the replay tree. The 350 * This function inserts a scanned non-direntry node to the replay list. The
332 * replay tree is an RB-tree containing @struct replay_entry elements which are 351 * replay list contains @struct replay_entry elements, and we sort this list in
333 * indexed by the sequence number. The replay tree is applied at the very end 352 * sequence number order before applying it. The replay list is applied at the
334 * of the replay process. Since the tree is sorted in sequence number order, 353 * very end of the replay process. Since the list is sorted in sequence number
335 * the older modifications are applied first. This function returns zero in 354 * order, the older modifications are applied first. This function returns zero
336 * case of success and a negative error code in case of failure. 355 * in case of success and a negative error code in case of failure.
337 */ 356 */
338static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 357static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
339 union ubifs_key *key, unsigned long long sqnum, 358 union ubifs_key *key, unsigned long long sqnum,
340 int deletion, int *used, loff_t old_size, 359 int deletion, int *used, loff_t old_size,
341 loff_t new_size) 360 loff_t new_size)
342{ 361{
343 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
344 struct replay_entry *r; 362 struct replay_entry *r;
345 363
364 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
365
346 if (key_inum(c, key) >= c->highest_inum) 366 if (key_inum(c, key) >= c->highest_inum)
347 c->highest_inum = key_inum(c, key); 367 c->highest_inum = key_inum(c, key);
348 368
349 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
350 while (*p) {
351 parent = *p;
352 r = rb_entry(parent, struct replay_entry, rb);
353 if (sqnum < r->sqnum) {
354 p = &(*p)->rb_left;
355 continue;
356 } else if (sqnum > r->sqnum) {
357 p = &(*p)->rb_right;
358 continue;
359 }
360 ubifs_err("duplicate sqnum in replay");
361 return -EINVAL;
362 }
363
364 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 369 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
365 if (!r) 370 if (!r)
366 return -ENOMEM; 371 return -ENOMEM;
@@ -370,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
370 r->lnum = lnum; 375 r->lnum = lnum;
371 r->offs = offs; 376 r->offs = offs;
372 r->len = len; 377 r->len = len;
378 r->deletion = !!deletion;
373 r->sqnum = sqnum; 379 r->sqnum = sqnum;
374 r->flags = (deletion ? REPLAY_DELETION : 0); 380 key_copy(c, key, &r->key);
375 r->old_size = old_size; 381 r->old_size = old_size;
376 r->new_size = new_size; 382 r->new_size = new_size;
377 key_copy(c, key, &r->key);
378 383
379 rb_link_node(&r->rb, parent, p); 384 list_add_tail(&r->list, &c->replay_list);
380 rb_insert_color(&r->rb, &c->replay_tree);
381 return 0; 385 return 0;
382} 386}
383 387
384/** 388/**
385 * insert_dent - insert a directory entry node into the replay tree. 389 * insert_dent - insert a directory entry node into the replay list.
386 * @c: UBIFS file-system description object 390 * @c: UBIFS file-system description object
387 * @lnum: node logical eraseblock number 391 * @lnum: node logical eraseblock number
388 * @offs: node offset 392 * @offs: node offset
@@ -394,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
394 * @deletion: non-zero if this is a deletion 398 * @deletion: non-zero if this is a deletion
395 * @used: number of bytes in use in a LEB 399 * @used: number of bytes in use in a LEB
396 * 400 *
397 * This function inserts a scanned directory entry node to the replay tree. 401 * This function inserts a scanned directory entry node or an extended
398 * Returns zero in case of success and a negative error code in case of 402 * attribute entry to the replay list. Returns zero in case of success and a
399 * failure. 403 * negative error code in case of failure.
400 *
401 * This function is also used for extended attribute entries because they are
402 * implemented as directory entry nodes.
403 */ 404 */
404static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 405static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
405 union ubifs_key *key, const char *name, int nlen, 406 union ubifs_key *key, const char *name, int nlen,
406 unsigned long long sqnum, int deletion, int *used) 407 unsigned long long sqnum, int deletion, int *used)
407{ 408{
408 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
409 struct replay_entry *r; 409 struct replay_entry *r;
410 char *nbuf; 410 char *nbuf;
411 411
412 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
412 if (key_inum(c, key) >= c->highest_inum) 413 if (key_inum(c, key) >= c->highest_inum)
413 c->highest_inum = key_inum(c, key); 414 c->highest_inum = key_inum(c, key);
414 415
415 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
416 while (*p) {
417 parent = *p;
418 r = rb_entry(parent, struct replay_entry, rb);
419 if (sqnum < r->sqnum) {
420 p = &(*p)->rb_left;
421 continue;
422 }
423 if (sqnum > r->sqnum) {
424 p = &(*p)->rb_right;
425 continue;
426 }
427 ubifs_err("duplicate sqnum in replay");
428 return -EINVAL;
429 }
430
431 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 416 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
432 if (!r) 417 if (!r)
433 return -ENOMEM; 418 return -ENOMEM;
419
434 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 420 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
435 if (!nbuf) { 421 if (!nbuf) {
436 kfree(r); 422 kfree(r);
@@ -442,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
442 r->lnum = lnum; 428 r->lnum = lnum;
443 r->offs = offs; 429 r->offs = offs;
444 r->len = len; 430 r->len = len;
431 r->deletion = !!deletion;
445 r->sqnum = sqnum; 432 r->sqnum = sqnum;
433 key_copy(c, key, &r->key);
446 r->nm.len = nlen; 434 r->nm.len = nlen;
447 memcpy(nbuf, name, nlen); 435 memcpy(nbuf, name, nlen);
448 nbuf[nlen] = '\0'; 436 nbuf[nlen] = '\0';
449 r->nm.name = nbuf; 437 r->nm.name = nbuf;
450 r->flags = (deletion ? REPLAY_DELETION : 0);
451 key_copy(c, key, &r->key);
452 438
453 ubifs_assert(!*p); 439 list_add_tail(&r->list, &c->replay_list);
454 rb_link_node(&r->rb, parent, p);
455 rb_insert_color(&r->rb, &c->replay_tree);
456 return 0; 440 return 0;
457} 441}
458 442
@@ -489,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
489} 473}
490 474
491/** 475/**
476 * is_last_bud - check if the bud is the last in the journal head.
477 * @c: UBIFS file-system description object
478 * @bud: bud description object
479 *
480 * This function checks if bud @bud is the last bud in its journal head. This
481 * information is then used by 'replay_bud()' to decide whether the bud can
482 * have corruptions or not. Indeed, only last buds can be corrupted by power
483 * cuts. Returns %1 if this is the last bud, and %0 if not.
484 */
485static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
486{
487 struct ubifs_jhead *jh = &c->jheads[bud->jhead];
488 struct ubifs_bud *next;
489 uint32_t data;
490 int err;
491
492 if (list_is_last(&bud->list, &jh->buds_list))
493 return 1;
494
495 /*
496 * The following is a quirk to make sure we work correctly with UBIFS
497 * images used with older UBIFS.
498 *
499 * Normally, the last bud will be the last in the journal head's list
500 * of bud. However, there is one exception if the UBIFS image belongs
501 * to older UBIFS. This is fairly unlikely: one would need to use old
502 * UBIFS, then have a power cut exactly at the right point, and then
503 * try to mount this image with new UBIFS.
504 *
505 * The exception is: it is possible to have 2 buds A and B, A goes
506 * before B, and B is the last, bud B is contains no data, and bud A is
507 * corrupted at the end. The reason is that in older versions when the
508 * journal code switched the next bud (from A to B), it first added a
509 * log reference node for the new bud (B), and only after this it
510 * synchronized the write-buffer of current bud (A). But later this was
511 * changed and UBIFS started to always synchronize the write-buffer of
512 * the bud (A) before writing the log reference for the new bud (B).
513 *
514 * But because older UBIFS always synchronized A's write-buffer before
515 * writing to B, we can recognize this exceptional situation but
516 * checking the contents of bud B - if it is empty, then A can be
517 * treated as the last and we can recover it.
518 *
519 * TODO: remove this piece of code in a couple of years (today it is
520 * 16.05.2011).
521 */
522 next = list_entry(bud->list.next, struct ubifs_bud, list);
523 if (!list_is_last(&next->list, &jh->buds_list))
524 return 0;
525
526 err = ubi_read(c->ubi, next->lnum, (char *)&data,
527 next->start, 4);
528 if (err)
529 return 0;
530
531 return data == 0xFFFFFFFF;
532}
533
534/**
492 * replay_bud - replay a bud logical eraseblock. 535 * replay_bud - replay a bud logical eraseblock.
493 * @c: UBIFS file-system description object 536 * @c: UBIFS file-system description object
494 * @lnum: bud logical eraseblock number to replay 537 * @b: bud entry which describes the bud
495 * @offs: bud start offset
496 * @jhead: journal head to which this bud belongs
497 * @free: amount of free space in the bud is returned here
498 * @dirty: amount of dirty space from padding and deletion nodes is returned
499 * here
500 * 538 *
501 * This function returns zero in case of success and a negative error code in 539 * This function replays bud @bud, recovers it if needed, and adds all nodes
502 * case of failure. 540 * from this bud to the replay list. Returns zero in case of success and a
541 * negative error code in case of failure.
503 */ 542 */
504static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 543static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
505 int *free, int *dirty)
506{ 544{
507 int err = 0, used = 0; 545 int is_last = is_last_bud(c, b->bud);
546 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
508 struct ubifs_scan_leb *sleb; 547 struct ubifs_scan_leb *sleb;
509 struct ubifs_scan_node *snod; 548 struct ubifs_scan_node *snod;
510 struct ubifs_bud *bud;
511 549
512 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 550 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
513 if (c->need_recovery) 551 lnum, b->bud->jhead, offs, is_last);
514 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 552
553 if (c->need_recovery && is_last)
554 /*
555 * Recover only last LEBs in the journal heads, because power
556 * cuts may cause corruptions only in these LEBs, because only
557 * these LEBs could possibly be written to at the power cut
558 * time.
559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
561 b->bud->jhead != GCHD);
515 else 562 else
516 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
517 if (IS_ERR(sleb)) 564 if (IS_ERR(sleb))
@@ -627,15 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
627 goto out; 674 goto out;
628 } 675 }
629 676
630 bud = ubifs_search_bud(c, lnum); 677 ubifs_assert(ubifs_search_bud(c, lnum));
631 if (!bud)
632 BUG();
633
634 ubifs_assert(sleb->endpt - offs >= used); 678 ubifs_assert(sleb->endpt - offs >= used);
635 ubifs_assert(sleb->endpt % c->min_io_size == 0); 679 ubifs_assert(sleb->endpt % c->min_io_size == 0);
636 680
637 *dirty = sleb->endpt - offs - used; 681 b->dirty = sleb->endpt - offs - used;
638 *free = c->leb_size - sleb->endpt; 682 b->free = c->leb_size - sleb->endpt;
683 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
639 684
640out: 685out:
641 ubifs_scan_destroy(sleb); 686 ubifs_scan_destroy(sleb);
@@ -649,58 +694,6 @@ out_dump:
649} 694}
650 695
651/** 696/**
652 * insert_ref_node - insert a reference node to the replay tree.
653 * @c: UBIFS file-system description object
654 * @lnum: node logical eraseblock number
655 * @offs: node offset
656 * @sqnum: sequence number
657 * @free: amount of free space in bud
658 * @dirty: amount of dirty space from padding and deletion nodes
659 * @jhead: journal head number for the bud
660 *
661 * This function inserts a reference node to the replay tree and returns zero
662 * in case of success or a negative error code in case of failure.
663 */
664static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
665 unsigned long long sqnum, int free, int dirty,
666 int jhead)
667{
668 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
669 struct replay_entry *r;
670
671 dbg_mnt("add ref LEB %d:%d", lnum, offs);
672 while (*p) {
673 parent = *p;
674 r = rb_entry(parent, struct replay_entry, rb);
675 if (sqnum < r->sqnum) {
676 p = &(*p)->rb_left;
677 continue;
678 } else if (sqnum > r->sqnum) {
679 p = &(*p)->rb_right;
680 continue;
681 }
682 ubifs_err("duplicate sqnum in replay tree");
683 return -EINVAL;
684 }
685
686 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
687 if (!r)
688 return -ENOMEM;
689
690 r->lnum = lnum;
691 r->offs = offs;
692 r->sqnum = sqnum;
693 r->flags = REPLAY_REF;
694 r->free = free;
695 r->dirty = dirty;
696 r->jhead = jhead;
697
698 rb_link_node(&r->rb, parent, p);
699 rb_insert_color(&r->rb, &c->replay_tree);
700 return 0;
701}
702
703/**
704 * replay_buds - replay all buds. 697 * replay_buds - replay all buds.
705 * @c: UBIFS file-system description object 698 * @c: UBIFS file-system description object
706 * 699 *
@@ -710,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
710static int replay_buds(struct ubifs_info *c) 703static int replay_buds(struct ubifs_info *c)
711{ 704{
712 struct bud_entry *b; 705 struct bud_entry *b;
713 int err, uninitialized_var(free), uninitialized_var(dirty); 706 int err;
707 unsigned long long prev_sqnum = 0;
714 708
715 list_for_each_entry(b, &c->replay_buds, list) { 709 list_for_each_entry(b, &c->replay_buds, list) {
716 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 710 err = replay_bud(c, b);
717 &free, &dirty);
718 if (err)
719 return err;
720 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
721 free, dirty, b->bud->jhead);
722 if (err) 711 if (err)
723 return err; 712 return err;
713
714 ubifs_assert(b->sqnum > prev_sqnum);
715 prev_sqnum = b->sqnum;
724 } 716 }
725 717
726 return 0; 718 return 0;
@@ -1060,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
1060 if (err) 1052 if (err)
1061 goto out; 1053 goto out;
1062 1054
1063 err = apply_replay_tree(c); 1055 err = apply_replay_list(c);
1056 if (err)
1057 goto out;
1058
1059 err = set_buds_lprops(c);
1064 if (err) 1060 if (err)
1065 goto out; 1061 goto out;
1066 1062
1067 /* 1063 /*
1068 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1064 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1069 * to roughly estimate index growth. Things like @c->min_idx_lebs 1065 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1070 * depend on it. This means we have to initialize it to make sure 1066 * depend on it. This means we have to initialize it to make sure
1071 * budgeting works properly. 1067 * budgeting works properly.
1072 */ 1068 */
1073 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1069 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1074 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1070 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1075 1071
1076 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1072 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1077 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1073 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1078 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1074 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1079 (unsigned long)c->highest_inum); 1075 (unsigned long)c->highest_inum);
1080out: 1076out:
1081 destroy_replay_tree(c); 1077 destroy_replay_list(c);
1082 destroy_bud_list(c); 1078 destroy_bud_list(c);
1083 c->replaying = 0; 1079 c->replaying = 0;
1084 return err; 1080 return err;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf31b4729e51..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
475 * @c: UBIFS file-system description object 475 * @c: UBIFS file-system description object
476 * 476 *
477 * This function returns a pointer to the superblock node or a negative error 477 * This function returns a pointer to the superblock node or a negative error
478 * code. 478 * code. Note, the user of this function is responsible of kfree()'ing the
479 * returned superblock buffer.
479 */ 480 */
480struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) 481struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
481{ 482{
@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
616 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 617 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
617 memcpy(&c->uuid, &sup->uuid, 16); 618 memcpy(&c->uuid, &sup->uuid, 16);
618 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 619 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
620 c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
619 621
620 /* Automatically increase file system size to the maximum size */ 622 /* Automatically increase file system size to the maximum size */
621 c->old_leb_cnt = c->leb_cnt; 623 c->old_leb_cnt = c->leb_cnt;
@@ -650,3 +652,152 @@ out:
650 kfree(sup); 652 kfree(sup);
651 return err; 653 return err;
652} 654}
655
656/**
657 * fixup_leb - fixup/unmap an LEB containing free space.
658 * @c: UBIFS file-system description object
659 * @lnum: the LEB number to fix up
660 * @len: number of used bytes in LEB (starting at offset 0)
661 *
662 * This function reads the contents of the given LEB number @lnum, then fixes
663 * it up, so that empty min. I/O units in the end of LEB are actually erased on
664 * flash (rather than being just all-0xff real data). If the LEB is completely
665 * empty, it is simply unmapped.
666 */
667static int fixup_leb(struct ubifs_info *c, int lnum, int len)
668{
669 int err;
670
671 ubifs_assert(len >= 0);
672 ubifs_assert(len % c->min_io_size == 0);
673 ubifs_assert(len < c->leb_size);
674
675 if (len == 0) {
676 dbg_mnt("unmap empty LEB %d", lnum);
677 return ubi_leb_unmap(c->ubi, lnum);
678 }
679
680 dbg_mnt("fixup LEB %d, data len %d", lnum, len);
681 err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
682 if (err)
683 return err;
684
685 return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
686}
687
688/**
689 * fixup_free_space - find & remap all LEBs containing free space.
690 * @c: UBIFS file-system description object
691 *
692 * This function walks through all LEBs in the filesystem and fiexes up those
693 * containing free/empty space.
694 */
695static int fixup_free_space(struct ubifs_info *c)
696{
697 int lnum, err = 0;
698 struct ubifs_lprops *lprops;
699
700 ubifs_get_lprops(c);
701
702 /* Fixup LEBs in the master area */
703 for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
704 err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
705 if (err)
706 goto out;
707 }
708
709 /* Unmap unused log LEBs */
710 lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
711 while (lnum != c->ltail_lnum) {
712 err = fixup_leb(c, lnum, 0);
713 if (err)
714 goto out;
715 lnum = ubifs_next_log_lnum(c, lnum);
716 }
717
718 /* Fixup the current log head */
719 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
720 if (err)
721 goto out;
722
723 /* Fixup LEBs in the LPT area */
724 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
725 int free = c->ltab[lnum - c->lpt_first].free;
726
727 if (free > 0) {
728 err = fixup_leb(c, lnum, c->leb_size - free);
729 if (err)
730 goto out;
731 }
732 }
733
734 /* Unmap LEBs in the orphans area */
735 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
736 err = fixup_leb(c, lnum, 0);
737 if (err)
738 goto out;
739 }
740
741 /* Fixup LEBs in the main area */
742 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
743 lprops = ubifs_lpt_lookup(c, lnum);
744 if (IS_ERR(lprops)) {
745 err = PTR_ERR(lprops);
746 goto out;
747 }
748
749 if (lprops->free > 0) {
750 err = fixup_leb(c, lnum, c->leb_size - lprops->free);
751 if (err)
752 goto out;
753 }
754 }
755
756out:
757 ubifs_release_lprops(c);
758 return err;
759}
760
761/**
762 * ubifs_fixup_free_space - find & fix all LEBs with free space.
763 * @c: UBIFS file-system description object
764 *
765 * This function fixes up LEBs containing free space on first mount, if the
766 * appropriate flag was set when the FS was created. Each LEB with one or more
767 * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
768 * the free space is actually erased. E.g., this is necessary for some NAND
769 * chips, since the free space may have been programmed like real "0xff" data
770 * (generating a non-0xff ECC), causing future writes to the not-really-erased
771 * NAND pages to behave badly. After the space is fixed up, the superblock flag
772 * is cleared, so that this is skipped for all future mounts.
773 */
774int ubifs_fixup_free_space(struct ubifs_info *c)
775{
776 int err;
777 struct ubifs_sb_node *sup;
778
779 ubifs_assert(c->space_fixup);
780 ubifs_assert(!c->ro_mount);
781
782 ubifs_msg("start fixing up free space");
783
784 err = fixup_free_space(c);
785 if (err)
786 return err;
787
788 sup = ubifs_read_sb_node(c);
789 if (IS_ERR(sup))
790 return PTR_ERR(sup);
791
792 /* Free-space fixup is no longer required */
793 c->space_fixup = 0;
794 sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
795
796 err = ubifs_write_sb_node(c, sup);
797 kfree(sup);
798 if (err)
799 return err;
800
801 ubifs_msg("free space fixup complete");
802 return err;
803}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 04ad07f4fcc3..6db0bdaa9f74 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -375,7 +375,7 @@ out:
375 ubifs_release_dirty_inode_budget(c, ui); 375 ubifs_release_dirty_inode_budget(c, ui);
376 else { 376 else {
377 /* We've deleted something - clean the "no space" flags */ 377 /* We've deleted something - clean the "no space" flags */
378 c->nospace = c->nospace_rp = 0; 378 c->bi.nospace = c->bi.nospace_rp = 0;
379 smp_wmb(); 379 smp_wmb();
380 } 380 }
381done: 381done:
@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
694 * be compressed and direntries are of the maximum size. 694 * be compressed and direntries are of the maximum size.
695 * 695 *
696 * Note, data, which may be stored in inodes is budgeted separately, so 696 * Note, data, which may be stored in inodes is budgeted separately, so
697 * it is not included into 'c->inode_budget'. 697 * it is not included into 'c->bi.inode_budget'.
698 */ 698 */
699 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 699 c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
700 c->inode_budget = UBIFS_INO_NODE_SZ; 700 c->bi.inode_budget = UBIFS_INO_NODE_SZ;
701 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; 701 c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
702 702
703 /* 703 /*
704 * When the amount of flash space used by buds becomes 704 * When the amount of flash space used by buds becomes
@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
742{ 742{
743 long long tmp64; 743 long long tmp64;
744 744
745 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 745 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
746 c->report_rp_size = ubifs_reported_space(c, c->rp_size); 746 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
747 747
748 /* 748 /*
@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
1144{ 1144{
1145 ubifs_assert(c->dark_wm > 0); 1145 ubifs_assert(c->dark_wm > 0);
1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1147 ubifs_err("insufficient free space to mount in read/write mode"); 1147 ubifs_err("insufficient free space to mount in R/W mode");
1148 dbg_dump_budg(c); 1148 dbg_dump_budg(c, &c->bi);
1149 dbg_dump_lprops(c); 1149 dbg_dump_lprops(c);
1150 return -ENOSPC; 1150 return -ENOSPC;
1151 } 1151 }
@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
1304 if (err) 1304 if (err)
1305 goto out_lpt; 1305 goto out_lpt;
1306 1306
1307 err = dbg_check_idx_size(c, c->old_idx_sz); 1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1308 if (err)
1309 goto out_lpt; 1309 goto out_lpt;
1310 1310
@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
1313 goto out_journal; 1313 goto out_journal;
1314 1314
1315 /* Calculate 'min_idx_lebs' after journal replay */ 1315 /* Calculate 'min_idx_lebs' after journal replay */
1316 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1316 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1317 1317
1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); 1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
1319 if (err) 1319 if (err)
@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1396 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1397 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1398
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1399 err = dbg_check_filesystem(c); 1405 err = dbg_check_filesystem(c);
1400 if (err) 1406 if (err)
1401 goto out_infos; 1407 goto out_infos;
@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
1442 c->main_lebs, c->main_first, c->leb_cnt - 1); 1448 c->main_lebs, c->main_first, c->leb_cnt - 1);
1443 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1449 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1444 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1450 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1445 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); 1451 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
1452 c->bi.old_idx_sz >> 20);
1446 dbg_msg("key hash type: %d", c->key_hash_type); 1453 dbg_msg("key hash type: %d", c->key_hash_type);
1447 dbg_msg("tree fanout: %d", c->fanout); 1454 dbg_msg("tree fanout: %d", c->fanout);
1448 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1455 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1463 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1464 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", 1465 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1459 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1466 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); 1467 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1461 dbg_msg("dead watermark: %d", c->dead_wm); 1468 dbg_msg("dead watermark: %d", c->dead_wm);
1462 dbg_msg("dark watermark: %d", c->dark_wm); 1469 dbg_msg("dark watermark: %d", c->dark_wm);
@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1584 } 1591 }
1585 sup->leb_cnt = cpu_to_le32(c->leb_cnt); 1592 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1586 err = ubifs_write_sb_node(c, sup); 1593 err = ubifs_write_sb_node(c, sup);
1594 kfree(sup);
1587 if (err) 1595 if (err)
1588 goto out; 1596 goto out;
1589 } 1597 }
@@ -1684,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1684 */ 1692 */
1685 err = dbg_check_space_info(c); 1693 err = dbg_check_space_info(c);
1686 } 1694 }
1695
1696 if (c->space_fixup) {
1697 err = ubifs_fixup_free_space(c);
1698 if (err)
1699 goto out;
1700 }
1701
1687 mutex_unlock(&c->umount_mutex); 1702 mutex_unlock(&c->umount_mutex);
1688 return err; 1703 return err;
1689 1704
@@ -1766,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
1766 * to write them back because of I/O errors. 1781 * to write them back because of I/O errors.
1767 */ 1782 */
1768 if (!c->ro_error) { 1783 if (!c->ro_error) {
1769 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1784 ubifs_assert(c->bi.idx_growth == 0);
1770 ubifs_assert(c->budg_idx_growth == 0); 1785 ubifs_assert(c->bi.dd_growth == 0);
1771 ubifs_assert(c->budg_dd_growth == 0); 1786 ubifs_assert(c->bi.data_growth == 0);
1772 ubifs_assert(c->budg_data_growth == 0);
1773 } 1787 }
1774 1788
1775 /* 1789 /*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index de485979ca39..8119b1fd8d94 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2557 if (err) { 2557 if (err) {
2558 /* Ensure the znode is dirtied */ 2558 /* Ensure the znode is dirtied */
2559 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2559 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2560 znode = dirty_cow_bottom_up(c, znode); 2560 znode = dirty_cow_bottom_up(c, znode);
2561 if (IS_ERR(znode)) { 2561 if (IS_ERR(znode)) {
2562 err = PTR_ERR(znode); 2562 err = PTR_ERR(znode);
2563 goto out_unlock; 2563 goto out_unlock;
2564 } 2564 }
2565 } 2565 }
2566 err = tnc_delete(c, znode, n); 2566 err = tnc_delete(c, znode, n);
2567 } 2567 }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
377 c->gap_lebs = NULL; 377 c->gap_lebs = NULL;
378 return err; 378 return err;
379 } 379 }
380 if (!dbg_force_in_the_gaps_enabled) { 380 if (dbg_force_in_the_gaps_enabled()) {
381 /* 381 /*
382 * Do not print scary warnings if the debugging 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled. 383 * option which forces in-the-gaps is enabled.
384 */ 384 */
385 ubifs_err("out of space"); 385 ubifs_warn("out of space");
386 spin_lock(&c->space_lock); 386 dbg_dump_budg(c, &c->bi);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c); 387 dbg_dump_lprops(c);
390 } 388 }
391 /* Try to commit anyway */ 389 /* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
796 spin_lock(&c->space_lock); 794 spin_lock(&c->space_lock);
797 /* 795 /*
798 * Although we have not finished committing yet, update size of the 796 * Although we have not finished committing yet, update size of the
799 * committed index ('c->old_idx_sz') and zero out the index growth 797 * committed index ('c->bi.old_idx_sz') and zero out the index growth
800 * budget. It is OK to do this now, because we've reserved all the 798 * budget. It is OK to do this now, because we've reserved all the
801 * space which is needed to commit the index, and it is save for the 799 * space which is needed to commit the index, and it is save for the
802 * budgeting subsystem to assume the index is already committed, 800 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 801 * even though it is not.
804 */ 802 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 803 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
806 c->old_idx_sz = c->calc_idx_sz; 804 c->bi.old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0; 805 c->bi.uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 806 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
809 spin_unlock(&c->space_lock); 807 spin_unlock(&c->space_lock);
810 mutex_unlock(&c->tnc_mutex); 808 mutex_unlock(&c->tnc_mutex);
811 809
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
408 * Superblock flags. 408 * Superblock flags.
409 * 409 *
410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set 410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
411 * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
411 */ 412 */
412enum { 413enum {
413 UBIFS_FLG_BIGLPT = 0x02, 414 UBIFS_FLG_BIGLPT = 0x02,
415 UBIFS_FLG_SPACE_FIXUP = 0x04,
414}; 416};
415 417
416/** 418/**
@@ -434,7 +436,7 @@ struct ubifs_ch {
434 __u8 node_type; 436 __u8 node_type;
435 __u8 group_type; 437 __u8 group_type;
436 __u8 padding[2]; 438 __u8 padding[2];
437} __attribute__ ((packed)); 439} __packed;
438 440
439/** 441/**
440 * union ubifs_dev_desc - device node descriptor. 442 * union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
448union ubifs_dev_desc { 450union ubifs_dev_desc {
449 __le32 new; 451 __le32 new;
450 __le64 huge; 452 __le64 huge;
451} __attribute__ ((packed)); 453} __packed;
452 454
453/** 455/**
454 * struct ubifs_ino_node - inode node. 456 * struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
509 __le16 compr_type; 511 __le16 compr_type;
510 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ 512 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
511 __u8 data[]; 513 __u8 data[];
512} __attribute__ ((packed)); 514} __packed;
513 515
514/** 516/**
515 * struct ubifs_dent_node - directory entry node. 517 * struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
534 __le16 nlen; 536 __le16 nlen;
535 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ 537 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
536 __u8 name[]; 538 __u8 name[];
537} __attribute__ ((packed)); 539} __packed;
538 540
539/** 541/**
540 * struct ubifs_data_node - data node. 542 * struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
555 __le16 compr_type; 557 __le16 compr_type;
556 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ 558 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
557 __u8 data[]; 559 __u8 data[];
558} __attribute__ ((packed)); 560} __packed;
559 561
560/** 562/**
561 * struct ubifs_trun_node - truncation node. 563 * struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
575 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ 577 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
576 __le64 old_size; 578 __le64 old_size;
577 __le64 new_size; 579 __le64 new_size;
578} __attribute__ ((packed)); 580} __packed;
579 581
580/** 582/**
581 * struct ubifs_pad_node - padding node. 583 * struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
586struct ubifs_pad_node { 588struct ubifs_pad_node {
587 struct ubifs_ch ch; 589 struct ubifs_ch ch;
588 __le32 pad_len; 590 __le32 pad_len;
589} __attribute__ ((packed)); 591} __packed;
590 592
591/** 593/**
592 * struct ubifs_sb_node - superblock node. 594 * struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
644 __u8 uuid[16]; 646 __u8 uuid[16];
645 __le32 ro_compat_version; 647 __le32 ro_compat_version;
646 __u8 padding2[3968]; 648 __u8 padding2[3968];
647} __attribute__ ((packed)); 649} __packed;
648 650
649/** 651/**
650 * struct ubifs_mst_node - master node. 652 * struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
711 __le32 idx_lebs; 713 __le32 idx_lebs;
712 __le32 leb_cnt; 714 __le32 leb_cnt;
713 __u8 padding[344]; 715 __u8 padding[344];
714} __attribute__ ((packed)); 716} __packed;
715 717
716/** 718/**
717 * struct ubifs_ref_node - logical eraseblock reference node. 719 * struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
727 __le32 offs; 729 __le32 offs;
728 __le32 jhead; 730 __le32 jhead;
729 __u8 padding[28]; 731 __u8 padding[28];
730} __attribute__ ((packed)); 732} __packed;
731 733
732/** 734/**
733 * struct ubifs_branch - key/reference/length branch 735 * struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
741 __le32 offs; 743 __le32 offs;
742 __le32 len; 744 __le32 len;
743 __u8 key[]; 745 __u8 key[];
744} __attribute__ ((packed)); 746} __packed;
745 747
746/** 748/**
747 * struct ubifs_idx_node - indexing node. 749 * struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
755 __le16 child_cnt; 757 __le16 child_cnt;
756 __le16 level; 758 __le16 level;
757 __u8 branches[]; 759 __u8 branches[];
758} __attribute__ ((packed)); 760} __packed;
759 761
760/** 762/**
761 * struct ubifs_cs_node - commit start node. 763 * struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
765struct ubifs_cs_node { 767struct ubifs_cs_node {
766 struct ubifs_ch ch; 768 struct ubifs_ch ch;
767 __le64 cmt_no; 769 __le64 cmt_no;
768} __attribute__ ((packed)); 770} __packed;
769 771
770/** 772/**
771 * struct ubifs_orph_node - orphan node. 773 * struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
777 struct ubifs_ch ch; 779 struct ubifs_ch ch;
778 __le64 cmt_no; 780 __le64 cmt_no;
779 __le64 inos[]; 781 __le64 inos[];
780} __attribute__ ((packed)); 782} __packed;
781 783
782#endif /* __UBIFS_MEDIA_H__ */ 784#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8c40ad3c6721..93d1412a06f0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
391 * make sure @inode->i_size is always changed under @ui_mutex, because it 391 * make sure @inode->i_size is always changed under @ui_mutex, because it
392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock 392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
393 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 393 * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
394 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 394 * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
395 * could consider to rework locking and base it on "shadow" fields. 395 * could consider to rework locking and base it on "shadow" fields.
396 */ 396 */
397struct ubifs_inode { 397struct ubifs_inode {
@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
937 unsigned int compr_type:2; 937 unsigned int compr_type:2;
938}; 938};
939 939
940/**
941 * struct ubifs_budg_info - UBIFS budgeting information.
942 * @idx_growth: amount of bytes budgeted for index growth
943 * @data_growth: amount of bytes budgeted for cached data
944 * @dd_growth: amount of bytes budgeted for cached data that will make
945 * other data dirty
946 * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
947 * which still have to be taken into account because the index
948 * has not been committed so far
949 * @old_idx_sz: size of index on flash
950 * @min_idx_lebs: minimum number of LEBs required for the index
951 * @nospace: non-zero if the file-system does not have flash space (used as
952 * optimization)
953 * @nospace_rp: the same as @nospace, but additionally means that even reserved
954 * pool is full
955 * @page_budget: budget for a page (constant, nenver changed after mount)
956 * @inode_budget: budget for an inode (constant, nenver changed after mount)
957 * @dent_budget: budget for a directory entry (constant, nenver changed after
958 * mount)
959 */
960struct ubifs_budg_info {
961 long long idx_growth;
962 long long data_growth;
963 long long dd_growth;
964 long long uncommitted_idx;
965 unsigned long long old_idx_sz;
966 int min_idx_lebs;
967 unsigned int nospace:1;
968 unsigned int nospace_rp:1;
969 int page_budget;
970 int inode_budget;
971 int dent_budget;
972};
973
940struct ubifs_debug_info; 974struct ubifs_debug_info;
941 975
942/** 976/**
@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
980 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 1014 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
981 * 1015 *
982 * @big_lpt: flag that LPT is too big to write whole during commit 1016 * @big_lpt: flag that LPT is too big to write whole during commit
1017 * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
983 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 1018 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
984 * recovery) 1019 * recovery)
985 * @bulk_read: enable bulk-reads 1020 * @bulk_read: enable bulk-reads
@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
1057 * @dirty_zn_cnt: number of dirty znodes 1092 * @dirty_zn_cnt: number of dirty znodes
1058 * @clean_zn_cnt: number of clean znodes 1093 * @clean_zn_cnt: number of clean znodes
1059 * 1094 *
1060 * @budg_idx_growth: amount of bytes budgeted for index growth 1095 * @space_lock: protects @bi and @lst
1061 * @budg_data_growth: amount of bytes budgeted for cached data 1096 * @lst: lprops statistics
1062 * @budg_dd_growth: amount of bytes budgeted for cached data that will make 1097 * @bi: budgeting information
1063 * other data dirty
1064 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
1065 * but which still have to be taken into account because
1066 * the index has not been committed so far
1067 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
1068 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1069 * @nospace, and @nospace_rp;
1070 * @min_idx_lebs: minimum number of LEBs required for the index
1071 * @old_idx_sz: size of index on flash
1072 * @calc_idx_sz: temporary variable which is used to calculate new index size 1098 * @calc_idx_sz: temporary variable which is used to calculate new index size
1073 * (contains accurate new index size at end of TNC commit start) 1099 * (contains accurate new index size at end of TNC commit start)
1074 * @lst: lprops statistics
1075 * @nospace: non-zero if the file-system does not have flash space (used as
1076 * optimization)
1077 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1078 * pool is full
1079 *
1080 * @page_budget: budget for a page
1081 * @inode_budget: budget for an inode
1082 * @dent_budget: budget for a directory entry
1083 * 1100 *
1084 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash 1101 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
1085 * I/O unit 1102 * I/O unit
1086 * @mst_node_alsz: master node aligned size 1103 * @mst_node_alsz: master node aligned size
1087 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary 1104 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
1088 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 1105 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
1189 * @replaying: %1 during journal replay 1206 * @replaying: %1 during journal replay
1190 * @mounting: %1 while mounting 1207 * @mounting: %1 while mounting
1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode 1208 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1192 * @replay_tree: temporary tree used during journal replay
1193 * @replay_list: temporary list used during journal replay 1209 * @replay_list: temporary list used during journal replay
1194 * @replay_buds: list of buds to replay 1210 * @replay_buds: list of buds to replay
1195 * @cs_sqnum: sequence number of first node in the log (commit start node) 1211 * @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1238,6 +1254,7 @@ struct ubifs_info {
1238 wait_queue_head_t cmt_wq; 1254 wait_queue_head_t cmt_wq;
1239 1255
1240 unsigned int big_lpt:1; 1256 unsigned int big_lpt:1;
1257 unsigned int space_fixup:1;
1241 unsigned int no_chk_data_crc:1; 1258 unsigned int no_chk_data_crc:1;
1242 unsigned int bulk_read:1; 1259 unsigned int bulk_read:1;
1243 unsigned int default_compr:2; 1260 unsigned int default_compr:2;
@@ -1308,21 +1325,10 @@ struct ubifs_info {
1308 atomic_long_t dirty_zn_cnt; 1325 atomic_long_t dirty_zn_cnt;
1309 atomic_long_t clean_zn_cnt; 1326 atomic_long_t clean_zn_cnt;
1310 1327
1311 long long budg_idx_growth;
1312 long long budg_data_growth;
1313 long long budg_dd_growth;
1314 long long budg_uncommitted_idx;
1315 spinlock_t space_lock; 1328 spinlock_t space_lock;
1316 int min_idx_lebs;
1317 unsigned long long old_idx_sz;
1318 unsigned long long calc_idx_sz;
1319 struct ubifs_lp_stats lst; 1329 struct ubifs_lp_stats lst;
1320 unsigned int nospace:1; 1330 struct ubifs_budg_info bi;
1321 unsigned int nospace_rp:1; 1331 unsigned long long calc_idx_sz;
1322
1323 int page_budget;
1324 int inode_budget;
1325 int dent_budget;
1326 1332
1327 int ref_node_alsz; 1333 int ref_node_alsz;
1328 int mst_node_alsz; 1334 int mst_node_alsz;
@@ -1430,7 +1436,6 @@ struct ubifs_info {
1430 unsigned int replaying:1; 1436 unsigned int replaying:1;
1431 unsigned int mounting:1; 1437 unsigned int mounting:1;
1432 unsigned int remounting_rw:1; 1438 unsigned int remounting_rw:1;
1433 struct rb_root replay_tree;
1434 struct list_head replay_list; 1439 struct list_head replay_list;
1435 struct list_head replay_buds; 1440 struct list_head replay_buds;
1436 unsigned long long cs_sqnum; 1441 unsigned long long cs_sqnum;
@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
1628int ubifs_read_superblock(struct ubifs_info *c); 1633int ubifs_read_superblock(struct ubifs_info *c);
1629struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); 1634struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1630int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); 1635int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1636int ubifs_fixup_free_space(struct ubifs_info *c);
1631 1637
1632/* replay.c */ 1638/* replay.c */
1633int ubifs_validate_entry(struct ubifs_info *c, 1639int ubifs_validate_entry(struct ubifs_info *c,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3299f469e712..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,8 +80,8 @@ enum {
80 SECURITY_XATTR, 80 SECURITY_XATTR,
81}; 81};
82 82
83static const struct inode_operations none_inode_operations; 83static const struct inode_operations empty_iops;
84static const struct file_operations none_file_operations; 84static const struct file_operations empty_fops;
85 85
86/** 86/**
87 * create_xattr - create an extended attribute. 87 * create_xattr - create an extended attribute.
@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &empty_aops; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &empty_iops;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &empty_fops;
136 136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; 137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode); 138 ui = ubifs_inode(inode);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f1dce848ef96..4d76594c2a8f 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -783,6 +783,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
783 struct fileIdentDesc *fi, cfi; 783 struct fileIdentDesc *fi, cfi;
784 struct kernel_lb_addr tloc; 784 struct kernel_lb_addr tloc;
785 785
786 dentry_unhash(dentry);
787
786 retval = -ENOENT; 788 retval = -ENOENT;
787 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); 789 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
788 if (!fi) 790 if (!fi)
@@ -1081,6 +1083,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1081 struct kernel_lb_addr tloc; 1083 struct kernel_lb_addr tloc;
1082 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1084 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1083 1085
1086 if (new_inode && S_ISDIR(new_inode->i_mode))
1087 dentry_unhash(new_dentry);
1088
1084 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); 1089 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1085 if (ofi) { 1090 if (ofi) {
1086 if (ofibh.sbh != ofibh.ebh) 1091 if (ofibh.sbh != ofibh.ebh)
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 46f7a807bbc1..42694e11c23d 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -424,8 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
424 ufs_cpu_to_data_ptr(sb, p, result); 424 ufs_cpu_to_data_ptr(sb, p, result);
425 *err = 0; 425 *err = 0;
426 UFS_I(inode)->i_lastfrag = 426 UFS_I(inode)->i_lastfrag =
427 max_t(u32, UFS_I(inode)->i_lastfrag, 427 max(UFS_I(inode)->i_lastfrag, fragment + count);
428 fragment + count);
429 ufs_clear_frags(inode, result + oldcount, 428 ufs_clear_frags(inode, result + oldcount,
430 newcount - oldcount, locked_page != NULL); 429 newcount - oldcount, locked_page != NULL);
431 } 430 }
@@ -440,7 +439,8 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
440 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err); 439 result = ufs_add_fragments (inode, tmp, oldcount, newcount, err);
441 if (result) { 440 if (result) {
442 *err = 0; 441 *err = 0;
443 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 442 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
443 fragment + count);
444 ufs_clear_frags(inode, result + oldcount, newcount - oldcount, 444 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
445 locked_page != NULL); 445 locked_page != NULL);
446 unlock_super(sb); 446 unlock_super(sb);
@@ -479,7 +479,8 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
479 uspi->s_sbbase + result, locked_page); 479 uspi->s_sbbase + result, locked_page);
480 ufs_cpu_to_data_ptr(sb, p, result); 480 ufs_cpu_to_data_ptr(sb, p, result);
481 *err = 0; 481 *err = 0;
482 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 482 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
483 fragment + count);
483 unlock_super(sb); 484 unlock_super(sb);
484 if (newcount < request) 485 if (newcount < request)
485 ufs_free_fragments (inode, result + newcount, request - newcount); 486 ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
409} 409}
410 410
411/** 411/**
412 * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and 412 * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
413 * readpage, writepage and so on 413 * readpage, writepage and so on
414 */ 414 */
415 415
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 29309e25417f..953ebdfc5bf7 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -258,6 +258,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
258 struct inode * inode = dentry->d_inode; 258 struct inode * inode = dentry->d_inode;
259 int err= -ENOTEMPTY; 259 int err= -ENOTEMPTY;
260 260
261 dentry_unhash(dentry);
262
261 lock_ufs(dir->i_sb); 263 lock_ufs(dir->i_sb);
262 if (ufs_empty_dir (inode)) { 264 if (ufs_empty_dir (inode)) {
263 err = ufs_unlink(dir, dentry); 265 err = ufs_unlink(dir, dentry);
@@ -282,6 +284,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
282 struct ufs_dir_entry *old_de; 284 struct ufs_dir_entry *old_de;
283 int err = -ENOENT; 285 int err = -ENOENT;
284 286
287 if (new_inode && S_ISDIR(new_inode->i_mode))
288 dentry_unhash(new_dentry);
289
285 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); 290 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
286 if (!old_de) 291 if (!old_de)
287 goto out; 292 goto out;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 5f821dbc0579..f04f89fbd4d9 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -84,7 +84,7 @@ static int ufs_trunc_direct(struct inode *inode)
84 retry = 0; 84 retry = 0;
85 85
86 frag1 = DIRECT_FRAGMENT; 86 frag1 = DIRECT_FRAGMENT;
87 frag4 = min_t(u32, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag); 87 frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
88 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1); 88 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
89 frag3 = frag4 & ~uspi->s_fpbmask; 89 frag3 = frag4 & ~uspi->s_fpbmask;
90 block1 = block2 = 0; 90 block1 = block2 = 0;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
37 36
38#include "xfs_sb.h" 37#include "xfs_sb.h"
39#include "xfs_inum.h" 38#include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
709 return bp; 708 return bp;
710} 709}
711 710
711/*
712 * Return a buffer allocated as an empty buffer and associated to external
713 * memory via xfs_buf_associate_memory() back to it's empty state.
714 */
715void
716xfs_buf_set_empty(
717 struct xfs_buf *bp,
718 size_t len)
719{
720 if (bp->b_pages)
721 _xfs_buf_free_pages(bp);
722
723 bp->b_pages = NULL;
724 bp->b_page_count = 0;
725 bp->b_addr = NULL;
726 bp->b_file_offset = 0;
727 bp->b_buffer_length = bp->b_count_desired = len;
728 bp->b_bn = XFS_BUF_DADDR_NULL;
729 bp->b_flags &= ~XBF_MAPPED;
730}
731
712static inline struct page * 732static inline struct page *
713mem_to_page( 733mem_to_page(
714 void *addr) 734 void *addr)
@@ -1402,12 +1422,12 @@ restart:
1402int 1422int
1403xfs_buftarg_shrink( 1423xfs_buftarg_shrink(
1404 struct shrinker *shrink, 1424 struct shrinker *shrink,
1405 int nr_to_scan, 1425 struct shrink_control *sc)
1406 gfp_t mask)
1407{ 1426{
1408 struct xfs_buftarg *btp = container_of(shrink, 1427 struct xfs_buftarg *btp = container_of(shrink,
1409 struct xfs_buftarg, bt_shrinker); 1428 struct xfs_buftarg, bt_shrinker);
1410 struct xfs_buf *bp; 1429 struct xfs_buf *bp;
1430 int nr_to_scan = sc->nr_to_scan;
1411 LIST_HEAD(dispose); 1431 LIST_HEAD(dispose);
1412 1432
1413 if (!nr_to_scan) 1433 if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
178 xfs_buf_flags_t); 178 xfs_buf_flags_t);
179 179
180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
181extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
181extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); 182extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
182extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 183extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
183extern void xfs_buf_hold(xfs_buf_t *); 184extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index d61611c88012..244e797dae32 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -191,3 +191,32 @@ xfs_ioc_trim(
191 return -XFS_ERROR(EFAULT); 191 return -XFS_ERROR(EFAULT);
192 return 0; 192 return 0;
193} 193}
194
195int
196xfs_discard_extents(
197 struct xfs_mount *mp,
198 struct list_head *list)
199{
200 struct xfs_busy_extent *busyp;
201 int error = 0;
202
203 list_for_each_entry(busyp, list, list) {
204 trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
205 busyp->length);
206
207 error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
208 XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
209 XFS_FSB_TO_BB(mp, busyp->length),
210 GFP_NOFS, 0);
211 if (error && error != EOPNOTSUPP) {
212 xfs_info(mp,
213 "discard failed for extent [0x%llu,%u], error %d",
214 (unsigned long long)busyp->bno,
215 busyp->length,
216 error);
217 return error;
218 }
219 }
220
221 return 0;
222}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
index e82b6dd3e127..344879aea646 100644
--- a/fs/xfs/linux-2.6/xfs_discard.h
+++ b/fs/xfs/linux-2.6/xfs_discard.h
@@ -2,7 +2,9 @@
2#define XFS_DISCARD_H 1 2#define XFS_DISCARD_H 1
3 3
4struct fstrim_range; 4struct fstrim_range;
5struct list_head;
5 6
6extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); 7extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
8extern int xfs_discard_extents(struct xfs_mount *, struct list_head *);
7 9
8#endif /* XFS_DISCARD_H */ 10#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
586 case XFS_IOC_RESVSP_32: 586 case XFS_IOC_RESVSP_32:
587 case XFS_IOC_UNRESVSP_32: 587 case XFS_IOC_UNRESVSP_32:
588 case XFS_IOC_RESVSP64_32: 588 case XFS_IOC_RESVSP64_32:
589 case XFS_IOC_UNRESVSP64_32: { 589 case XFS_IOC_UNRESVSP64_32:
590 case XFS_IOC_ZERO_RANGE_32: {
590 struct xfs_flock64 bf; 591 struct xfs_flock64 bf;
591 592
592 if (xfs_compat_flock64_copyin(&bf, arg)) 593 if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) 184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) 185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) 186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
187#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
187 188
188typedef struct compat_xfs_fsop_geom_v1 { 189typedef struct compat_xfs_fsop_geom_v1 {
189 __u32 blocksize; /* filesystem (data) block size */ 190 __u32 blocksize; /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
70#include <linux/ctype.h> 70#include <linux/ctype.h>
71#include <linux/writeback.h> 71#include <linux/writeback.h>
72#include <linux/capability.h> 72#include <linux/capability.h>
73#include <linux/list_sort.h>
73 74
74#include <asm/page.h> 75#include <asm/page.h>
75#include <asm/div64.h> 76#include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
41 printk("%sXFS: %pV\n", level, vaf); 41 printk("%sXFS: %pV\n", level, vaf);
42} 42}
43 43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \ 44#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \ 45void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \ 46{ \
@@ -95,8 +78,7 @@ xfs_alert_tag(
95 int do_panic = 0; 78 int do_panic = 0;
96 79
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp, 81 xfs_alert(mp, "Transforming an alert into a BUG.");
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1; 82 do_panic = 1;
101 } 83 }
102 84
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 6extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 7 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 8extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 25extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 26 __attribute__ ((format (printf, 2, 3)));
30#else 27#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28static inline void
29__attribute__ ((format (printf, 2, 3)))
30xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{ 31{
33} 32}
34#endif 33#endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..98b9c91fcdf1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -110,8 +110,10 @@ mempool_t *xfs_ioend_pool;
110#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ 110#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
111#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ 111#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
112#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ 112#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
113#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ 113#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */
114#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ 114#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
115#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
116#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
115 117
116/* 118/*
117 * Table driven mount option parser. 119 * Table driven mount option parser.
@@ -355,6 +357,10 @@ xfs_parseargs(
355 mp->m_flags |= XFS_MOUNT_DELAYLOG; 357 mp->m_flags |= XFS_MOUNT_DELAYLOG;
356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 358 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 359 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
360 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
361 mp->m_flags |= XFS_MOUNT_DISCARD;
362 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
363 mp->m_flags &= ~XFS_MOUNT_DISCARD;
358 } else if (!strcmp(this_char, "ihashsize")) { 364 } else if (!strcmp(this_char, "ihashsize")) {
359 xfs_warn(mp, 365 xfs_warn(mp,
360 "ihashsize no longer used, option is deprecated."); 366 "ihashsize no longer used, option is deprecated.");
@@ -388,6 +394,13 @@ xfs_parseargs(
388 return EINVAL; 394 return EINVAL;
389 } 395 }
390 396
397 if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
398 !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
399 xfs_warn(mp,
400 "the discard option is incompatible with the nodelaylog option");
401 return EINVAL;
402 }
403
391#ifndef CONFIG_XFS_QUOTA 404#ifndef CONFIG_XFS_QUOTA
392 if (XFS_IS_QUOTA_RUNNING(mp)) { 405 if (XFS_IS_QUOTA_RUNNING(mp)) {
393 xfs_warn(mp, "quota support not available in this kernel."); 406 xfs_warn(mp, "quota support not available in this kernel.");
@@ -488,6 +501,7 @@ xfs_showargs(
488 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 501 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
489 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 502 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
490 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, 503 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
504 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
491 { 0, NULL } 505 { 0, NULL }
492 }; 506 };
493 static struct proc_xfs_info xfs_info_unset[] = { 507 static struct proc_xfs_info xfs_info_unset[] = {
@@ -1787,10 +1801,6 @@ init_xfs_fs(void)
1787 if (error) 1801 if (error)
1788 goto out_cleanup_procfs; 1802 goto out_cleanup_procfs;
1789 1803
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1794 vfs_initquota(); 1804 vfs_initquota();
1795 1805
1796 error = register_filesystem(&xfs_fs_type); 1806 error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3e898a48122d..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
267 267
268 error = xfs_iflush(ip, flags); 268 error = xfs_iflush(ip, flags);
269 269
270 /*
271 * We don't want to try again on non-blocking flushes that can't run
272 * again immediately. If an inode really must be written, then that's
273 * what the SYNC_WAIT flag is for.
274 */
275 if (error == EAGAIN) {
276 ASSERT(!(flags & SYNC_WAIT));
277 error = 0;
278 }
279
270 out_unlock: 280 out_unlock:
271 xfs_iunlock(ip, XFS_ILOCK_SHARED); 281 xfs_iunlock(ip, XFS_ILOCK_SHARED);
272 return error; 282 return error;
@@ -1022,13 +1032,14 @@ xfs_reclaim_inodes(
1022static int 1032static int
1023xfs_reclaim_inode_shrink( 1033xfs_reclaim_inode_shrink(
1024 struct shrinker *shrink, 1034 struct shrinker *shrink,
1025 int nr_to_scan, 1035 struct shrink_control *sc)
1026 gfp_t gfp_mask)
1027{ 1036{
1028 struct xfs_mount *mp; 1037 struct xfs_mount *mp;
1029 struct xfs_perag *pag; 1038 struct xfs_perag *pag;
1030 xfs_agnumber_t ag; 1039 xfs_agnumber_t ag;
1031 int reclaimable; 1040 int reclaimable;
1041 int nr_to_scan = sc->nr_to_scan;
1042 gfp_t gfp_mask = sc->gfp_mask;
1032 1043
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1044 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1045 if (nr_to_scan) {
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
1151 1151
1152); 1152);
1153 1153
1154#define XFS_BUSY_SYNC \ 1154DECLARE_EVENT_CLASS(xfs_busy_class,
1155 { 0, "async" }, \
1156 { 1, "sync" }
1157
1158TRACE_EVENT(xfs_alloc_busy,
1159 TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
1160 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
1161 TP_ARGS(trans, agno, agbno, len, sync),
1162 TP_STRUCT__entry(
1163 __field(dev_t, dev)
1164 __field(struct xfs_trans *, tp)
1165 __field(int, tid)
1166 __field(xfs_agnumber_t, agno)
1167 __field(xfs_agblock_t, agbno)
1168 __field(xfs_extlen_t, len)
1169 __field(int, sync)
1170 ),
1171 TP_fast_assign(
1172 __entry->dev = trans->t_mountp->m_super->s_dev;
1173 __entry->tp = trans;
1174 __entry->tid = trans->t_ticket->t_tid;
1175 __entry->agno = agno;
1176 __entry->agbno = agbno;
1177 __entry->len = len;
1178 __entry->sync = sync;
1179 ),
1180 TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
1181 MAJOR(__entry->dev), MINOR(__entry->dev),
1182 __entry->tp,
1183 __entry->tid,
1184 __entry->agno,
1185 __entry->agbno,
1186 __entry->len,
1187 __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
1188
1189);
1190
1191TRACE_EVENT(xfs_alloc_unbusy,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1155 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1193 xfs_agblock_t agbno, xfs_extlen_t len), 1156 xfs_agblock_t agbno, xfs_extlen_t len),
1194 TP_ARGS(mp, agno, agbno, len), 1157 TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
1210 __entry->agbno, 1173 __entry->agbno,
1211 __entry->len) 1174 __entry->len)
1212); 1175);
1176#define DEFINE_BUSY_EVENT(name) \
1177DEFINE_EVENT(xfs_busy_class, name, \
1178 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
1179 xfs_agblock_t agbno, xfs_extlen_t len), \
1180 TP_ARGS(mp, agno, agbno, len))
1181DEFINE_BUSY_EVENT(xfs_alloc_busy);
1182DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
1183DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
1184DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
1185DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
1213 1186
1214#define XFS_BUSY_STATES \ 1187TRACE_EVENT(xfs_alloc_busy_trim,
1215 { 0, "missing" }, \
1216 { 1, "found" }
1217
1218TRACE_EVENT(xfs_alloc_busysearch,
1219 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1188 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1220 xfs_agblock_t agbno, xfs_extlen_t len, int found), 1189 xfs_agblock_t agbno, xfs_extlen_t len,
1221 TP_ARGS(mp, agno, agbno, len, found), 1190 xfs_agblock_t tbno, xfs_extlen_t tlen),
1191 TP_ARGS(mp, agno, agbno, len, tbno, tlen),
1222 TP_STRUCT__entry( 1192 TP_STRUCT__entry(
1223 __field(dev_t, dev) 1193 __field(dev_t, dev)
1224 __field(xfs_agnumber_t, agno) 1194 __field(xfs_agnumber_t, agno)
1225 __field(xfs_agblock_t, agbno) 1195 __field(xfs_agblock_t, agbno)
1226 __field(xfs_extlen_t, len) 1196 __field(xfs_extlen_t, len)
1227 __field(int, found) 1197 __field(xfs_agblock_t, tbno)
1198 __field(xfs_extlen_t, tlen)
1228 ), 1199 ),
1229 TP_fast_assign( 1200 TP_fast_assign(
1230 __entry->dev = mp->m_super->s_dev; 1201 __entry->dev = mp->m_super->s_dev;
1231 __entry->agno = agno; 1202 __entry->agno = agno;
1232 __entry->agbno = agbno; 1203 __entry->agbno = agbno;
1233 __entry->len = len; 1204 __entry->len = len;
1234 __entry->found = found; 1205 __entry->tbno = tbno;
1206 __entry->tlen = tlen;
1235 ), 1207 ),
1236 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1208 TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
1237 MAJOR(__entry->dev), MINOR(__entry->dev), 1209 MAJOR(__entry->dev), MINOR(__entry->dev),
1238 __entry->agno, 1210 __entry->agno,
1239 __entry->agbno, 1211 __entry->agbno,
1240 __entry->len, 1212 __entry->len,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1213 __entry->tbno,
1214 __entry->tlen)
1242); 1215);
1243 1216
1244TRACE_EVENT(xfs_trans_commit_lsn, 1217TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1418 __entry->wasfromfl, 1391 __entry->wasfromfl,
1419 __entry->isfl, 1392 __entry->isfl,
1420 __entry->userdata, 1393 __entry->userdata,
1421 __entry->firstblock) 1394 (unsigned long long)__entry->firstblock)
1422) 1395)
1423 1396
1424#define DEFINE_ALLOC_EVENT(name) \ 1397#define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1433DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); 1406DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1434DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); 1407DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1435DEFINE_ALLOC_EVENT(xfs_alloc_near_error); 1408DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1409DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
1410DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
1436DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); 1411DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1437DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); 1412DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1438DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); 1413DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1439DEFINE_ALLOC_EVENT(xfs_alloc_size_done); 1414DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1440DEFINE_ALLOC_EVENT(xfs_alloc_size_error); 1415DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1416DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
1441DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); 1417DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1442DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); 1418DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1443DEFINE_ALLOC_EVENT(xfs_alloc_small_done); 1419DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
60 60
61STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 61STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
63STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); 63STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
64 64
65static struct shrinker xfs_qm_shaker = { 65static struct shrinker xfs_qm_shaker = {
66 .shrink = xfs_qm_shake, 66 .shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
2009STATIC int 2009STATIC int
2010xfs_qm_shake( 2010xfs_qm_shake(
2011 struct shrinker *shrink, 2011 struct shrinker *shrink,
2012 int nr_to_scan, 2012 struct shrink_control *sc)
2013 gfp_t gfp_mask)
2014{ 2013{
2015 int ndqused, nfree, n; 2014 int ndqused, nfree, n;
2015 gfp_t gfp_mask = sc->gfp_mask;
2016 2016
2017 if (!kmem_shake_allow(gfp_mask)) 2017 if (!kmem_shake_allow(gfp_mask))
2018 return 0; 2018 return 0;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..6530769a999b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,9 @@ struct xfs_busy_extent {
187 xfs_agnumber_t agno; 187 xfs_agnumber_t agno;
188 xfs_agblock_t bno; 188 xfs_agblock_t bno;
189 xfs_extlen_t length; 189 xfs_extlen_t length;
190 xlog_tid_t tid; /* transaction that created this */ 190 unsigned int flags;
191#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */
192#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */
191}; 193};
192 194
193/* 195/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..95862bbff56b 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
41#define XFSA_FIXUP_BNO_OK 1 41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2 42#define XFSA_FIXUP_CNT_OK 2
43 43
44/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53 49STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
54/* 50 xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
55 * Internal functions.
56 */
57 51
58/* 52/*
59 * Lookup the record equal to [bno, len] in the btree given by cur. 53 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
154 xfs_extlen_t *reslen) /* result length */ 148 xfs_extlen_t *reslen) /* result length */
155{ 149{
156 xfs_agblock_t bno; 150 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len; 151 xfs_extlen_t len;
159 152
160 if (args->alignment > 1 && foundlen >= args->minlen) { 153 /* Trim busy sections out of found extent */
161 bno = roundup(foundbno, args->alignment); 154 xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
162 diff = bno - foundbno; 155
163 len = diff >= foundlen ? 0 : foundlen - diff; 156 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno;
159
160 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff;
164 } else { 162 } else {
165 bno = foundbno; 163 *resbno = bno;
166 len = foundlen; 164 *reslen = len;
167 } 165 }
168 *resbno = bno;
169 *reslen = len;
170} 166}
171 167
172/* 168/*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
280 return 1; 276 return 1;
281 agf = XFS_BUF_TO_AGF(args->agbp); 277 agf = XFS_BUF_TO_AGF(args->agbp);
282 diff = be32_to_cpu(agf->agf_freeblks) 278 diff = be32_to_cpu(agf->agf_freeblks)
283 + be32_to_cpu(agf->agf_flcount)
284 - args->len - args->minleft; 279 - args->len - args->minleft;
285 if (diff >= 0) 280 if (diff >= 0)
286 return 1; 281 return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
541 if (error) 536 if (error)
542 return error; 537 return error;
543 538
544 /* 539 ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
545 * Search the busylist for these blocks and mark the 540 args->agbno, args->len));
546 * transaction as synchronous if blocks are found. This
547 * avoids the need to block due to a synchronous log
548 * force to ensure correct ordering as the synchronous
549 * transaction will guarantee that for us.
550 */
551 if (xfs_alloc_busy_search(args->mp, args->agno,
552 args->agbno, args->len))
553 xfs_trans_set_sync(args->tp);
554 } 541 }
555 542
556 if (!args->isfl) { 543 if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
577{ 564{
578 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 565 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
579 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 566 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
580 xfs_agblock_t end; /* end of allocated extent */
581 int error; 567 int error;
582 xfs_agblock_t fbno; /* start block of found extent */ 568 xfs_agblock_t fbno; /* start block of found extent */
583 xfs_agblock_t fend; /* end block of found extent */
584 xfs_extlen_t flen; /* length of found extent */ 569 xfs_extlen_t flen; /* length of found extent */
570 xfs_agblock_t tbno; /* start block of trimmed extent */
571 xfs_extlen_t tlen; /* length of trimmed extent */
572 xfs_agblock_t tend; /* end block of trimmed extent */
573 xfs_agblock_t end; /* end of allocated extent */
585 int i; /* success/failure of operation */ 574 int i; /* success/failure of operation */
586 xfs_agblock_t maxend; /* end of maximal extent */
587 xfs_agblock_t minend; /* end of minimal extent */
588 xfs_extlen_t rlen; /* length of returned extent */ 575 xfs_extlen_t rlen; /* length of returned extent */
589 576
590 ASSERT(args->alignment == 1); 577 ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
614 goto error0; 601 goto error0;
615 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
616 ASSERT(fbno <= args->agbno); 603 ASSERT(fbno <= args->agbno);
617 minend = args->agbno + args->minlen;
618 maxend = args->agbno + args->maxlen;
619 fend = fbno + flen;
620 604
621 /* 605 /*
622 * Give up if the freespace isn't long enough for the minimum request. 606 * Check for overlapping busy extents.
623 */ 607 */
624 if (fend < minend) 608 xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
609
610 /*
611 * Give up if the start of the extent is busy, or the freespace isn't
612 * long enough for the minimum request.
613 */
614 if (tbno > args->agbno)
615 goto not_found;
616 if (tlen < args->minlen)
617 goto not_found;
618 tend = tbno + tlen;
619 if (tend < args->agbno + args->minlen)
625 goto not_found; 620 goto not_found;
626 621
627 /* 622 /*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
630 * 625 *
631 * Fix the length according to mod and prod if given. 626 * Fix the length according to mod and prod if given.
632 */ 627 */
633 end = XFS_AGBLOCK_MIN(fend, maxend); 628 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
634 args->len = end - args->agbno; 629 args->len = end - args->agbno;
635 xfs_alloc_fix_len(args); 630 xfs_alloc_fix_len(args);
636 if (!xfs_alloc_fix_minleft(args)) 631 if (!xfs_alloc_fix_minleft(args))
637 goto not_found; 632 goto not_found;
638 633
639 rlen = args->len; 634 rlen = args->len;
640 ASSERT(args->agbno + rlen <= fend); 635 ASSERT(args->agbno + rlen <= tend);
641 end = args->agbno + rlen; 636 end = args->agbno + rlen;
642 637
643 /* 638 /*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
686 struct xfs_btree_cur **scur, /* searching cursor */ 681 struct xfs_btree_cur **scur, /* searching cursor */
687 xfs_agblock_t gdiff, /* difference for search comparison */ 682 xfs_agblock_t gdiff, /* difference for search comparison */
688 xfs_agblock_t *sbno, /* extent found by search */ 683 xfs_agblock_t *sbno, /* extent found by search */
689 xfs_extlen_t *slen, 684 xfs_extlen_t *slen, /* extent length */
690 xfs_extlen_t *slena, /* aligned length */ 685 xfs_agblock_t *sbnoa, /* aligned extent found by search */
686 xfs_extlen_t *slena, /* aligned extent length */
691 int dir) /* 0 = search right, 1 = search left */ 687 int dir) /* 0 = search right, 1 = search left */
692{ 688{
693 xfs_agblock_t bno;
694 xfs_agblock_t new; 689 xfs_agblock_t new;
695 xfs_agblock_t sdiff; 690 xfs_agblock_t sdiff;
696 int error; 691 int error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
708 if (error) 703 if (error)
709 goto error0; 704 goto error0;
710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 705 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); 706 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
712 707
713 /* 708 /*
714 * The good extent is closer than this one. 709 * The good extent is closer than this one.
715 */ 710 */
716 if (!dir) { 711 if (!dir) {
717 if (bno >= args->agbno + gdiff) 712 if (*sbnoa >= args->agbno + gdiff)
718 goto out_use_good; 713 goto out_use_good;
719 } else { 714 } else {
720 if (bno <= args->agbno - gdiff) 715 if (*sbnoa <= args->agbno - gdiff)
721 goto out_use_good; 716 goto out_use_good;
722 } 717 }
723 718
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
729 xfs_alloc_fix_len(args); 724 xfs_alloc_fix_len(args);
730 725
731 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 726 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
732 args->alignment, *sbno, 727 args->alignment, *sbnoa,
733 *slen, &new); 728 *slena, &new);
734 729
735 /* 730 /*
736 * Choose closer size and invalidate other cursor. 731 * Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
780 xfs_agblock_t gtbnoa; /* aligned ... */ 775 xfs_agblock_t gtbnoa; /* aligned ... */
781 xfs_extlen_t gtdiff; /* difference to right side entry */ 776 xfs_extlen_t gtdiff; /* difference to right side entry */
782 xfs_extlen_t gtlen; /* length of right side entry */ 777 xfs_extlen_t gtlen; /* length of right side entry */
783 xfs_extlen_t gtlena = 0; /* aligned ... */ 778 xfs_extlen_t gtlena; /* aligned ... */
784 xfs_agblock_t gtnew; /* useful start bno of right side */ 779 xfs_agblock_t gtnew; /* useful start bno of right side */
785 int error; /* error code */ 780 int error; /* error code */
786 int i; /* result code, temporary */ 781 int i; /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
789 xfs_agblock_t ltbnoa; /* aligned ... */ 784 xfs_agblock_t ltbnoa; /* aligned ... */
790 xfs_extlen_t ltdiff; /* difference to left side entry */ 785 xfs_extlen_t ltdiff; /* difference to left side entry */
791 xfs_extlen_t ltlen; /* length of left side entry */ 786 xfs_extlen_t ltlen; /* length of left side entry */
792 xfs_extlen_t ltlena = 0; /* aligned ... */ 787 xfs_extlen_t ltlena; /* aligned ... */
793 xfs_agblock_t ltnew; /* useful start bno of left side */ 788 xfs_agblock_t ltnew; /* useful start bno of left side */
794 xfs_extlen_t rlen; /* length of returned extent */ 789 xfs_extlen_t rlen; /* length of returned extent */
790 int forced = 0;
795#if defined(DEBUG) && defined(__KERNEL__) 791#if defined(DEBUG) && defined(__KERNEL__)
796 /* 792 /*
797 * Randomly don't execute the first algorithm. 793 * Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
800 796
801 dofirst = random32() & 1; 797 dofirst = random32() & 1;
802#endif 798#endif
799
800restart:
801 bno_cur_lt = NULL;
802 bno_cur_gt = NULL;
803 ltlen = 0;
804 gtlena = 0;
805 ltlena = 0;
806
803 /* 807 /*
804 * Get a cursor for the by-size btree. 808 * Get a cursor for the by-size btree.
805 */ 809 */
806 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 810 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
807 args->agno, XFS_BTNUM_CNT); 811 args->agno, XFS_BTNUM_CNT);
808 ltlen = 0; 812
809 bno_cur_lt = bno_cur_gt = NULL;
810 /* 813 /*
811 * See if there are any free extents as big as maxlen. 814 * See if there are any free extents as big as maxlen.
812 */ 815 */
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
822 goto error0; 825 goto error0;
823 if (i == 0 || ltlen == 0) { 826 if (i == 0 || ltlen == 0) {
824 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 827 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
828 trace_xfs_alloc_near_noentry(args);
825 return 0; 829 return 0;
826 } 830 }
827 ASSERT(i == 1); 831 ASSERT(i == 1);
828 } 832 }
829 args->wasfromfl = 0; 833 args->wasfromfl = 0;
834
830 /* 835 /*
831 * First algorithm. 836 * First algorithm.
832 * If the requested extent is large wrt the freespaces available 837 * If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
890 if (args->len < blen) 895 if (args->len < blen)
891 continue; 896 continue;
892 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 897 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
893 args->alignment, ltbno, ltlen, &ltnew); 898 args->alignment, ltbnoa, ltlena, &ltnew);
894 if (ltnew != NULLAGBLOCK && 899 if (ltnew != NULLAGBLOCK &&
895 (args->len > blen || ltdiff < bdiff)) { 900 (args->len > blen || ltdiff < bdiff)) {
896 bdiff = ltdiff; 901 bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
1042 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1043 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1044 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1049 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1045 args->alignment, ltbno, ltlen, &ltnew); 1050 args->alignment, ltbnoa, ltlena, &ltnew);
1046 1051
1047 error = xfs_alloc_find_best_extent(args, 1052 error = xfs_alloc_find_best_extent(args,
1048 &bno_cur_lt, &bno_cur_gt, 1053 &bno_cur_lt, &bno_cur_gt,
1049 ltdiff, &gtbno, &gtlen, &gtlena, 1054 ltdiff, &gtbno, &gtlen,
1055 &gtbnoa, &gtlena,
1050 0 /* search right */); 1056 0 /* search right */);
1051 } else { 1057 } else {
1052 ASSERT(gtlena >= args->minlen); 1058 ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
1057 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1063 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058 xfs_alloc_fix_len(args); 1064 xfs_alloc_fix_len(args);
1059 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1065 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060 args->alignment, gtbno, gtlen, &gtnew); 1066 args->alignment, gtbnoa, gtlena, &gtnew);
1061 1067
1062 error = xfs_alloc_find_best_extent(args, 1068 error = xfs_alloc_find_best_extent(args,
1063 &bno_cur_gt, &bno_cur_lt, 1069 &bno_cur_gt, &bno_cur_lt,
1064 gtdiff, &ltbno, &ltlen, &ltlena, 1070 gtdiff, &ltbno, &ltlen,
1071 &ltbnoa, &ltlena,
1065 1 /* search left */); 1072 1 /* search left */);
1066 } 1073 }
1067 1074
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
1073 * If we couldn't get anything, give up. 1080 * If we couldn't get anything, give up.
1074 */ 1081 */
1075 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1082 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1083 if (!forced++) {
1084 trace_xfs_alloc_near_busy(args);
1085 xfs_log_force(args->mp, XFS_LOG_SYNC);
1086 goto restart;
1087 }
1088
1076 trace_xfs_alloc_size_neither(args); 1089 trace_xfs_alloc_size_neither(args);
1077 args->agbno = NULLAGBLOCK; 1090 args->agbno = NULLAGBLOCK;
1078 return 0; 1091 return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
1107 return 0; 1120 return 0;
1108 } 1121 }
1109 rlen = args->len; 1122 rlen = args->len;
1110 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1123 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1111 ltlen, &ltnew); 1124 ltbnoa, ltlena, &ltnew);
1112 ASSERT(ltnew >= ltbno); 1125 ASSERT(ltnew >= ltbno);
1113 ASSERT(ltnew + rlen <= ltbno + ltlen); 1126 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1114 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1127 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1115 args->agbno = ltnew; 1128 args->agbno = ltnew;
1129
1116 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1130 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1117 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1131 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1118 goto error0; 1132 goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
1155 int i; /* temp status variable */ 1169 int i; /* temp status variable */
1156 xfs_agblock_t rbno; /* returned block number */ 1170 xfs_agblock_t rbno; /* returned block number */
1157 xfs_extlen_t rlen; /* length of returned extent */ 1171 xfs_extlen_t rlen; /* length of returned extent */
1172 int forced = 0;
1158 1173
1174restart:
1159 /* 1175 /*
1160 * Allocate and initialize a cursor for the by-size btree. 1176 * Allocate and initialize a cursor for the by-size btree.
1161 */ 1177 */
1162 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1178 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1163 args->agno, XFS_BTNUM_CNT); 1179 args->agno, XFS_BTNUM_CNT);
1164 bno_cur = NULL; 1180 bno_cur = NULL;
1181
1165 /* 1182 /*
1166 * Look for an entry >= maxlen+alignment-1 blocks. 1183 * Look for an entry >= maxlen+alignment-1 blocks.
1167 */ 1184 */
1168 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1185 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1169 args->maxlen + args->alignment - 1, &i))) 1186 args->maxlen + args->alignment - 1, &i)))
1170 goto error0; 1187 goto error0;
1188
1171 /* 1189 /*
1172 * If none, then pick up the last entry in the tree unless the 1190 * If none or we have busy extents that we cannot allocate from, then
1173 * tree is empty. 1191 * we have to settle for a smaller extent. In the case that there are
1192 * no large extents, this will return the last entry in the tree unless
1193 * the tree is empty. In the case that there are only busy large
1194 * extents, this will return the largest small extent unless there
1195 * are no smaller extents available.
1174 */ 1196 */
1175 if (!i) { 1197 if (!i || forced > 1) {
1176 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1198 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1177 &flen, &i))) 1199 &fbno, &flen, &i);
1200 if (error)
1178 goto error0; 1201 goto error0;
1179 if (i == 0 || flen == 0) { 1202 if (i == 0 || flen == 0) {
1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1203 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
1182 return 0; 1205 return 0;
1183 } 1206 }
1184 ASSERT(i == 1); 1207 ASSERT(i == 1);
1208 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1209 } else {
1210 /*
1211 * Search for a non-busy extent that is large enough.
1212 * If we are at low space, don't check, or if we fall of
1213 * the end of the btree, turn off the busy check and
1214 * restart.
1215 */
1216 for (;;) {
1217 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1218 if (error)
1219 goto error0;
1220 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1221
1222 xfs_alloc_compute_aligned(args, fbno, flen,
1223 &rbno, &rlen);
1224
1225 if (rlen >= args->maxlen)
1226 break;
1227
1228 error = xfs_btree_increment(cnt_cur, 0, &i);
1229 if (error)
1230 goto error0;
1231 if (i == 0) {
1232 /*
1233 * Our only valid extents must have been busy.
1234 * Make it unbusy by forcing the log out and
1235 * retrying. If we've been here before, forcing
1236 * the log isn't making the extents available,
1237 * which means they have probably been freed in
1238 * this transaction. In that case, we have to
1239 * give up on them and we'll attempt a minlen
1240 * allocation the next time around.
1241 */
1242 xfs_btree_del_cursor(cnt_cur,
1243 XFS_BTREE_NOERROR);
1244 trace_xfs_alloc_size_busy(args);
1245 if (!forced++)
1246 xfs_log_force(args->mp, XFS_LOG_SYNC);
1247 goto restart;
1248 }
1249 }
1185 } 1250 }
1186 /* 1251
1187 * There's a freespace as big as maxlen+alignment-1, get it.
1188 */
1189 else {
1190 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1191 goto error0;
1192 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1193 }
1194 /* 1252 /*
1195 * In the first case above, we got the last entry in the 1253 * In the first case above, we got the last entry in the
1196 * by-size btree. Now we check to see if the space hits maxlen 1254 * by-size btree. Now we check to see if the space hits maxlen
1197 * once aligned; if not, we search left for something better. 1255 * once aligned; if not, we search left for something better.
1198 * This can't happen in the second case above. 1256 * This can't happen in the second case above.
1199 */ 1257 */
1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1258 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1259 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1260 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
1251 * Fix up the length. 1308 * Fix up the length.
1252 */ 1309 */
1253 args->len = rlen; 1310 args->len = rlen;
1254 xfs_alloc_fix_len(args); 1311 if (rlen < args->minlen) {
1255 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1312 if (!forced++) {
1256 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1313 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1257 trace_xfs_alloc_size_nominleft(args); 1314 trace_xfs_alloc_size_busy(args);
1258 args->agbno = NULLAGBLOCK; 1315 xfs_log_force(args->mp, XFS_LOG_SYNC);
1259 return 0; 1316 goto restart;
1317 }
1318 goto out_nominleft;
1260 } 1319 }
1320 xfs_alloc_fix_len(args);
1321
1322 if (!xfs_alloc_fix_minleft(args))
1323 goto out_nominleft;
1261 rlen = args->len; 1324 rlen = args->len;
1262 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1325 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1263 /* 1326 /*
@@ -1287,6 +1350,12 @@ error0:
1287 if (bno_cur) 1350 if (bno_cur)
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1351 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1289 return error; 1352 return error;
1353
1354out_nominleft:
1355 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1356 trace_xfs_alloc_size_nominleft(args);
1357 args->agbno = NULLAGBLOCK;
1358 return 0;
1290} 1359}
1291 1360
1292/* 1361/*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
1326 if (error) 1395 if (error)
1327 goto error0; 1396 goto error0;
1328 if (fbno != NULLAGBLOCK) { 1397 if (fbno != NULLAGBLOCK) {
1398 xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
1399 args->userdata);
1400
1329 if (args->userdata) { 1401 if (args->userdata) {
1330 xfs_buf_t *bp; 1402 xfs_buf_t *bp;
1331 1403
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
1617 1689
1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1690 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1619 1691
1620 /*
1621 * Since blocks move to the free list without the coordination
1622 * used in xfs_bmap_finish, we can't allow block to be available
1623 * for reallocation and non-transaction writing (user data)
1624 * until we know that the transaction that moved it to the free
1625 * list is permanently on disk. We track the blocks by declaring
1626 * these blocks as "busy"; the busy list is maintained on a per-ag
1627 * basis and each transaction records which entries should be removed
1628 * when the iclog commits to disk. If a busy block is allocated,
1629 * the iclog is pushed up to the LSN that freed the block.
1630 */
1631 xfs_alloc_busy_insert(tp, agno, bno, len);
1632 return 0; 1692 return 0;
1633 1693
1634 error0: 1694 error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
1923 xfs_alloc_log_agf(tp, agbp, logflags); 1983 xfs_alloc_log_agf(tp, agbp, logflags);
1924 *bnop = bno; 1984 *bnop = bno;
1925 1985
1926 /*
1927 * As blocks are freed, they are added to the per-ag busy list and
1928 * remain there until the freeing transaction is committed to disk.
1929 * Now that we have allocated blocks, this list must be searched to see
1930 * if a block is being reused. If one is, then the freeing transaction
1931 * must be pushed to disk before this transaction.
1932 *
1933 * We do this by setting the current transaction to a sync transaction
1934 * which guarantees that the freeing transaction is on disk before this
1935 * transaction. This is done instead of a synchronous log force here so
1936 * that we don't sit and wait with the AGF locked in the transaction
1937 * during the log force.
1938 */
1939 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
1940 xfs_trans_set_sync(tp);
1941 return 0; 1986 return 0;
1942} 1987}
1943 1988
@@ -2423,119 +2468,26 @@ xfs_free_extent(
2423 } 2468 }
2424 2469
2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2470 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2471 if (!error)
2472 xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0);
2426error0: 2473error0:
2427 xfs_perag_put(args.pag); 2474 xfs_perag_put(args.pag);
2428 return error; 2475 return error;
2429} 2476}
2430 2477
2431
2432/*
2433 * AG Busy list management
2434 * The busy list contains block ranges that have been freed but whose
2435 * transactions have not yet hit disk. If any block listed in a busy
2436 * list is reused, the transaction that freed it must be forced to disk
2437 * before continuing to use the block.
2438 *
2439 * xfs_alloc_busy_insert - add to the per-ag busy list
2440 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2441 * xfs_alloc_busy_search - search for a busy extent
2442 */
2443
2444/*
2445 * Insert a new extent into the busy tree.
2446 *
2447 * The busy extent tree is indexed by the start block of the busy extent.
2448 * there can be multiple overlapping ranges in the busy extent tree but only
2449 * ever one entry at a given start block. The reason for this is that
2450 * multi-block extents can be freed, then smaller chunks of that extent
2451 * allocated and freed again before the first transaction commit is on disk.
2452 * If the exact same start block is freed a second time, we have to wait for
2453 * that busy extent to pass out of the tree before the new extent is inserted.
2454 * There are two main cases we have to handle here.
2455 *
2456 * The first case is a transaction that triggers a "free - allocate - free"
2457 * cycle. This can occur during btree manipulations as a btree block is freed
2458 * to the freelist, then allocated from the free list, then freed again. In
2459 * this case, the second extxpnet free is what triggers the duplicate and as
2460 * such the transaction IDs should match. Because the extent was allocated in
2461 * this transaction, the transaction must be marked as synchronous. This is
2462 * true for all cases where the free/alloc/free occurs in the one transaction,
2463 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2464 * This serves to catch violations of the second case quite effectively.
2465 *
2466 * The second case is where the free/alloc/free occur in different
2467 * transactions. In this case, the thread freeing the extent the second time
2468 * can't mark the extent busy immediately because it is already tracked in a
2469 * transaction that may be committing. When the log commit for the existing
2470 * busy extent completes, the busy extent will be removed from the tree. If we
2471 * allow the second busy insert to continue using that busy extent structure,
2472 * it can be freed before this transaction is safely in the log. Hence our
2473 * only option in this case is to force the log to remove the existing busy
2474 * extent from the list before we insert the new one with the current
2475 * transaction ID.
2476 *
2477 * The problem we are trying to avoid in the free-alloc-free in separate
2478 * transactions is most easily described with a timeline:
2479 *
2480 * Thread 1 Thread 2 Thread 3 xfslogd
2481 * xact alloc
2482 * free X
2483 * mark busy
2484 * commit xact
2485 * free xact
2486 * xact alloc
2487 * alloc X
2488 * busy search
2489 * mark xact sync
2490 * commit xact
2491 * free xact
2492 * force log
2493 * checkpoint starts
2494 * ....
2495 * xact alloc
2496 * free X
2497 * mark busy
2498 * finds match
2499 * *** KABOOM! ***
2500 * ....
2501 * log IO completes
2502 * unbusy X
2503 * checkpoint completes
2504 *
2505 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2506 * the checkpoint completes, and the busy extent it matched will have been
2507 * removed from the tree when it is woken. Hence it can then continue safely.
2508 *
2509 * However, to ensure this matching process is robust, we need to use the
2510 * transaction ID for identifying transaction, as delayed logging results in
2511 * the busy extent and transaction lifecycles being different. i.e. the busy
2512 * extent is active for a lot longer than the transaction. Hence the
2513 * transaction structure can be freed and reallocated, then mark the same
2514 * extent busy again in the new transaction. In this case the new transaction
2515 * will have a different tid but can have the same address, and hence we need
2516 * to check against the tid.
2517 *
2518 * Future: for delayed logging, we could avoid the log force if the extent was
2519 * first freed in the current checkpoint sequence. This, however, requires the
2520 * ability to pin the current checkpoint in memory until this transaction
2521 * commits to ensure that both the original free and the current one combine
2522 * logically into the one checkpoint. If the checkpoint sequences are
2523 * different, however, we still need to wait on a log force.
2524 */
2525void 2478void
2526xfs_alloc_busy_insert( 2479xfs_alloc_busy_insert(
2527 struct xfs_trans *tp, 2480 struct xfs_trans *tp,
2528 xfs_agnumber_t agno, 2481 xfs_agnumber_t agno,
2529 xfs_agblock_t bno, 2482 xfs_agblock_t bno,
2530 xfs_extlen_t len) 2483 xfs_extlen_t len,
2484 unsigned int flags)
2531{ 2485{
2532 struct xfs_busy_extent *new; 2486 struct xfs_busy_extent *new;
2533 struct xfs_busy_extent *busyp; 2487 struct xfs_busy_extent *busyp;
2534 struct xfs_perag *pag; 2488 struct xfs_perag *pag;
2535 struct rb_node **rbp; 2489 struct rb_node **rbp;
2536 struct rb_node *parent; 2490 struct rb_node *parent = NULL;
2537 int match;
2538
2539 2491
2540 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2492 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2541 if (!new) { 2493 if (!new) {
@@ -2544,7 +2496,7 @@ xfs_alloc_busy_insert(
2544 * block, make this a synchronous transaction to insure that 2496 * block, make this a synchronous transaction to insure that
2545 * the block is not reused before this transaction commits. 2497 * the block is not reused before this transaction commits.
2546 */ 2498 */
2547 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2499 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2548 xfs_trans_set_sync(tp); 2500 xfs_trans_set_sync(tp);
2549 return; 2501 return;
2550 } 2502 }
@@ -2552,66 +2504,29 @@ xfs_alloc_busy_insert(
2552 new->agno = agno; 2504 new->agno = agno;
2553 new->bno = bno; 2505 new->bno = bno;
2554 new->length = len; 2506 new->length = len;
2555 new->tid = xfs_log_get_trans_ident(tp);
2556
2557 INIT_LIST_HEAD(&new->list); 2507 INIT_LIST_HEAD(&new->list);
2508 new->flags = flags;
2558 2509
2559 /* trace before insert to be able to see failed inserts */ 2510 /* trace before insert to be able to see failed inserts */
2560 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2511 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2561 2512
2562 pag = xfs_perag_get(tp->t_mountp, new->agno); 2513 pag = xfs_perag_get(tp->t_mountp, new->agno);
2563restart:
2564 spin_lock(&pag->pagb_lock); 2514 spin_lock(&pag->pagb_lock);
2565 rbp = &pag->pagb_tree.rb_node; 2515 rbp = &pag->pagb_tree.rb_node;
2566 parent = NULL; 2516 while (*rbp) {
2567 busyp = NULL;
2568 match = 0;
2569 while (*rbp && match >= 0) {
2570 parent = *rbp; 2517 parent = *rbp;
2571 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2518 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2572 2519
2573 if (new->bno < busyp->bno) { 2520 if (new->bno < busyp->bno) {
2574 /* may overlap, but exact start block is lower */
2575 rbp = &(*rbp)->rb_left; 2521 rbp = &(*rbp)->rb_left;
2576 if (new->bno + new->length > busyp->bno) 2522 ASSERT(new->bno + new->length <= busyp->bno);
2577 match = busyp->tid == new->tid ? 1 : -1;
2578 } else if (new->bno > busyp->bno) { 2523 } else if (new->bno > busyp->bno) {
2579 /* may overlap, but exact start block is higher */
2580 rbp = &(*rbp)->rb_right; 2524 rbp = &(*rbp)->rb_right;
2581 if (bno < busyp->bno + busyp->length) 2525 ASSERT(bno >= busyp->bno + busyp->length);
2582 match = busyp->tid == new->tid ? 1 : -1;
2583 } else { 2526 } else {
2584 match = busyp->tid == new->tid ? 1 : -1; 2527 ASSERT(0);
2585 break;
2586 } 2528 }
2587 } 2529 }
2588 if (match < 0) {
2589 /* overlap marked busy in different transaction */
2590 spin_unlock(&pag->pagb_lock);
2591 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2592 goto restart;
2593 }
2594 if (match > 0) {
2595 /*
2596 * overlap marked busy in same transaction. Update if exact
2597 * start block match, otherwise combine the busy extents into
2598 * a single range.
2599 */
2600 if (busyp->bno == new->bno) {
2601 busyp->length = max(busyp->length, new->length);
2602 spin_unlock(&pag->pagb_lock);
2603 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2604 xfs_perag_put(pag);
2605 kmem_free(new);
2606 return;
2607 }
2608 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2609 new->length = max(busyp->bno + busyp->length,
2610 new->bno + new->length) -
2611 min(busyp->bno, new->bno);
2612 new->bno = min(busyp->bno, new->bno);
2613 } else
2614 busyp = NULL;
2615 2530
2616 rb_link_node(&new->rb_node, parent, rbp); 2531 rb_link_node(&new->rb_node, parent, rbp);
2617 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2532 rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2534,6 @@ restart:
2619 list_add(&new->list, &tp->t_busy); 2534 list_add(&new->list, &tp->t_busy);
2620 spin_unlock(&pag->pagb_lock); 2535 spin_unlock(&pag->pagb_lock);
2621 xfs_perag_put(pag); 2536 xfs_perag_put(pag);
2622 kmem_free(busyp);
2623} 2537}
2624 2538
2625/* 2539/*
@@ -2668,31 +2582,466 @@ xfs_alloc_busy_search(
2668 } 2582 }
2669 } 2583 }
2670 spin_unlock(&pag->pagb_lock); 2584 spin_unlock(&pag->pagb_lock);
2671 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2672 xfs_perag_put(pag); 2585 xfs_perag_put(pag);
2673 return match; 2586 return match;
2674} 2587}
2675 2588
2589/*
2590 * The found free extent [fbno, fend] overlaps part or all of the given busy
2591 * extent. If the overlap covers the beginning, the end, or all of the busy
2592 * extent, the overlapping portion can be made unbusy and used for the
2593 * allocation. We can't split a busy extent because we can't modify a
2594 * transaction/CIL context busy list, but we can update an entries block
2595 * number or length.
2596 *
2597 * Returns true if the extent can safely be reused, or false if the search
2598 * needs to be restarted.
2599 */
2600STATIC bool
2601xfs_alloc_busy_update_extent(
2602 struct xfs_mount *mp,
2603 struct xfs_perag *pag,
2604 struct xfs_busy_extent *busyp,
2605 xfs_agblock_t fbno,
2606 xfs_extlen_t flen,
2607 bool userdata)
2608{
2609 xfs_agblock_t fend = fbno + flen;
2610 xfs_agblock_t bbno = busyp->bno;
2611 xfs_agblock_t bend = bbno + busyp->length;
2612
2613 /*
2614 * This extent is currently being discarded. Give the thread
2615 * performing the discard a chance to mark the extent unbusy
2616 * and retry.
2617 */
2618 if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
2619 spin_unlock(&pag->pagb_lock);
2620 delay(1);
2621 spin_lock(&pag->pagb_lock);
2622 return false;
2623 }
2624
2625 /*
2626 * If there is a busy extent overlapping a user allocation, we have
2627 * no choice but to force the log and retry the search.
2628 *
2629 * Fortunately this does not happen during normal operation, but
2630 * only if the filesystem is very low on space and has to dip into
2631 * the AGFL for normal allocations.
2632 */
2633 if (userdata)
2634 goto out_force_log;
2635
2636 if (bbno < fbno && bend > fend) {
2637 /*
2638 * Case 1:
2639 * bbno bend
2640 * +BBBBBBBBBBBBBBBBB+
2641 * +---------+
2642 * fbno fend
2643 */
2644
2645 /*
2646 * We would have to split the busy extent to be able to track
2647 * it correct, which we cannot do because we would have to
2648 * modify the list of busy extents attached to the transaction
2649 * or CIL context, which is immutable.
2650 *
2651 * Force out the log to clear the busy extent and retry the
2652 * search.
2653 */
2654 goto out_force_log;
2655 } else if (bbno >= fbno && bend <= fend) {
2656 /*
2657 * Case 2:
2658 * bbno bend
2659 * +BBBBBBBBBBBBBBBBB+
2660 * +-----------------+
2661 * fbno fend
2662 *
2663 * Case 3:
2664 * bbno bend
2665 * +BBBBBBBBBBBBBBBBB+
2666 * +--------------------------+
2667 * fbno fend
2668 *
2669 * Case 4:
2670 * bbno bend
2671 * +BBBBBBBBBBBBBBBBB+
2672 * +--------------------------+
2673 * fbno fend
2674 *
2675 * Case 5:
2676 * bbno bend
2677 * +BBBBBBBBBBBBBBBBB+
2678 * +-----------------------------------+
2679 * fbno fend
2680 *
2681 */
2682
2683 /*
2684 * The busy extent is fully covered by the extent we are
2685 * allocating, and can simply be removed from the rbtree.
2686 * However we cannot remove it from the immutable list
2687 * tracking busy extents in the transaction or CIL context,
2688 * so set the length to zero to mark it invalid.
2689 *
2690 * We also need to restart the busy extent search from the
2691 * tree root, because erasing the node can rearrange the
2692 * tree topology.
2693 */
2694 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2695 busyp->length = 0;
2696 return false;
2697 } else if (fend < bend) {
2698 /*
2699 * Case 6:
2700 * bbno bend
2701 * +BBBBBBBBBBBBBBBBB+
2702 * +---------+
2703 * fbno fend
2704 *
2705 * Case 7:
2706 * bbno bend
2707 * +BBBBBBBBBBBBBBBBB+
2708 * +------------------+
2709 * fbno fend
2710 *
2711 */
2712 busyp->bno = fend;
2713 } else if (bbno < fbno) {
2714 /*
2715 * Case 8:
2716 * bbno bend
2717 * +BBBBBBBBBBBBBBBBB+
2718 * +-------------+
2719 * fbno fend
2720 *
2721 * Case 9:
2722 * bbno bend
2723 * +BBBBBBBBBBBBBBBBB+
2724 * +----------------------+
2725 * fbno fend
2726 */
2727 busyp->length = fbno - busyp->bno;
2728 } else {
2729 ASSERT(0);
2730 }
2731
2732 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2733 return true;
2734
2735out_force_log:
2736 spin_unlock(&pag->pagb_lock);
2737 xfs_log_force(mp, XFS_LOG_SYNC);
2738 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2739 spin_lock(&pag->pagb_lock);
2740 return false;
2741}
2742
2743
2744/*
2745 * For a given extent [fbno, flen], make sure we can reuse it safely.
2746 */
2676void 2747void
2677xfs_alloc_busy_clear( 2748xfs_alloc_busy_reuse(
2678 struct xfs_mount *mp, 2749 struct xfs_mount *mp,
2679 struct xfs_busy_extent *busyp) 2750 xfs_agnumber_t agno,
2751 xfs_agblock_t fbno,
2752 xfs_extlen_t flen,
2753 bool userdata)
2680{ 2754{
2681 struct xfs_perag *pag; 2755 struct xfs_perag *pag;
2756 struct rb_node *rbp;
2682 2757
2683 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, 2758 ASSERT(flen > 0);
2684 busyp->length);
2685 2759
2686 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, 2760 pag = xfs_perag_get(mp, agno);
2687 busyp->length) == 1); 2761 spin_lock(&pag->pagb_lock);
2762restart:
2763 rbp = pag->pagb_tree.rb_node;
2764 while (rbp) {
2765 struct xfs_busy_extent *busyp =
2766 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2767 xfs_agblock_t bbno = busyp->bno;
2768 xfs_agblock_t bend = bbno + busyp->length;
2688 2769
2689 list_del_init(&busyp->list); 2770 if (fbno + flen <= bbno) {
2771 rbp = rbp->rb_left;
2772 continue;
2773 } else if (fbno >= bend) {
2774 rbp = rbp->rb_right;
2775 continue;
2776 }
2690 2777
2691 pag = xfs_perag_get(mp, busyp->agno); 2778 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2692 spin_lock(&pag->pagb_lock); 2779 userdata))
2693 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2780 goto restart;
2781 }
2694 spin_unlock(&pag->pagb_lock); 2782 spin_unlock(&pag->pagb_lock);
2695 xfs_perag_put(pag); 2783 xfs_perag_put(pag);
2784}
2785
2786/*
2787 * For a given extent [fbno, flen], search the busy extent list to find a
2788 * subset of the extent that is not busy. If *rlen is smaller than
2789 * args->minlen no suitable extent could be found, and the higher level
2790 * code needs to force out the log and retry the allocation.
2791 */
2792STATIC void
2793xfs_alloc_busy_trim(
2794 struct xfs_alloc_arg *args,
2795 xfs_agblock_t bno,
2796 xfs_extlen_t len,
2797 xfs_agblock_t *rbno,
2798 xfs_extlen_t *rlen)
2799{
2800 xfs_agblock_t fbno;
2801 xfs_extlen_t flen;
2802 struct rb_node *rbp;
2803
2804 ASSERT(len > 0);
2805
2806 spin_lock(&args->pag->pagb_lock);
2807restart:
2808 fbno = bno;
2809 flen = len;
2810 rbp = args->pag->pagb_tree.rb_node;
2811 while (rbp && flen >= args->minlen) {
2812 struct xfs_busy_extent *busyp =
2813 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2814 xfs_agblock_t fend = fbno + flen;
2815 xfs_agblock_t bbno = busyp->bno;
2816 xfs_agblock_t bend = bbno + busyp->length;
2817
2818 if (fend <= bbno) {
2819 rbp = rbp->rb_left;
2820 continue;
2821 } else if (fbno >= bend) {
2822 rbp = rbp->rb_right;
2823 continue;
2824 }
2825
2826 /*
2827 * If this is a metadata allocation, try to reuse the busy
2828 * extent instead of trimming the allocation.
2829 */
2830 if (!args->userdata &&
2831 !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
2832 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2833 busyp, fbno, flen,
2834 false))
2835 goto restart;
2836 continue;
2837 }
2838
2839 if (bbno <= fbno) {
2840 /* start overlap */
2841
2842 /*
2843 * Case 1:
2844 * bbno bend
2845 * +BBBBBBBBBBBBBBBBB+
2846 * +---------+
2847 * fbno fend
2848 *
2849 * Case 2:
2850 * bbno bend
2851 * +BBBBBBBBBBBBBBBBB+
2852 * +-------------+
2853 * fbno fend
2854 *
2855 * Case 3:
2856 * bbno bend
2857 * +BBBBBBBBBBBBBBBBB+
2858 * +-------------+
2859 * fbno fend
2860 *
2861 * Case 4:
2862 * bbno bend
2863 * +BBBBBBBBBBBBBBBBB+
2864 * +-----------------+
2865 * fbno fend
2866 *
2867 * No unbusy region in extent, return failure.
2868 */
2869 if (fend <= bend)
2870 goto fail;
2871
2872 /*
2873 * Case 5:
2874 * bbno bend
2875 * +BBBBBBBBBBBBBBBBB+
2876 * +----------------------+
2877 * fbno fend
2878 *
2879 * Case 6:
2880 * bbno bend
2881 * +BBBBBBBBBBBBBBBBB+
2882 * +--------------------------+
2883 * fbno fend
2884 *
2885 * Needs to be trimmed to:
2886 * +-------+
2887 * fbno fend
2888 */
2889 fbno = bend;
2890 } else if (bend >= fend) {
2891 /* end overlap */
2892
2893 /*
2894 * Case 7:
2895 * bbno bend
2896 * +BBBBBBBBBBBBBBBBB+
2897 * +------------------+
2898 * fbno fend
2899 *
2900 * Case 8:
2901 * bbno bend
2902 * +BBBBBBBBBBBBBBBBB+
2903 * +--------------------------+
2904 * fbno fend
2905 *
2906 * Needs to be trimmed to:
2907 * +-------+
2908 * fbno fend
2909 */
2910 fend = bbno;
2911 } else {
2912 /* middle overlap */
2913
2914 /*
2915 * Case 9:
2916 * bbno bend
2917 * +BBBBBBBBBBBBBBBBB+
2918 * +-----------------------------------+
2919 * fbno fend
2920 *
2921 * Can be trimmed to:
2922 * +-------+ OR +-------+
2923 * fbno fend fbno fend
2924 *
2925 * Backward allocation leads to significant
2926 * fragmentation of directories, which degrades
2927 * directory performance, therefore we always want to
2928 * choose the option that produces forward allocation
2929 * patterns.
2930 * Preferring the lower bno extent will make the next
2931 * request use "fend" as the start of the next
2932 * allocation; if the segment is no longer busy at
2933 * that point, we'll get a contiguous allocation, but
2934 * even if it is still busy, we will get a forward
2935 * allocation.
2936 * We try to avoid choosing the segment at "bend",
2937 * because that can lead to the next allocation
2938 * taking the segment at "fbno", which would be a
2939 * backward allocation. We only use the segment at
2940 * "fbno" if it is much larger than the current
2941 * requested size, because in that case there's a
2942 * good chance subsequent allocations will be
2943 * contiguous.
2944 */
2945 if (bbno - fbno >= args->maxlen) {
2946 /* left candidate fits perfect */
2947 fend = bbno;
2948 } else if (fend - bend >= args->maxlen * 4) {
2949 /* right candidate has enough free space */
2950 fbno = bend;
2951 } else if (bbno - fbno >= args->minlen) {
2952 /* left candidate fits minimum requirement */
2953 fend = bbno;
2954 } else {
2955 goto fail;
2956 }
2957 }
2958
2959 flen = fend - fbno;
2960 }
2961 spin_unlock(&args->pag->pagb_lock);
2962
2963 if (fbno != bno || flen != len) {
2964 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2965 fbno, flen);
2966 }
2967 *rbno = fbno;
2968 *rlen = flen;
2969 return;
2970fail:
2971 /*
2972 * Return a zero extent length as failure indications. All callers
2973 * re-check if the trimmed extent satisfies the minlen requirement.
2974 */
2975 spin_unlock(&args->pag->pagb_lock);
2976 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
2977 *rbno = fbno;
2978 *rlen = 0;
2979}
2980
2981static void
2982xfs_alloc_busy_clear_one(
2983 struct xfs_mount *mp,
2984 struct xfs_perag *pag,
2985 struct xfs_busy_extent *busyp)
2986{
2987 if (busyp->length) {
2988 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
2989 busyp->length);
2990 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2991 }
2696 2992
2993 list_del_init(&busyp->list);
2697 kmem_free(busyp); 2994 kmem_free(busyp);
2698} 2995}
2996
2997/*
2998 * Remove all extents on the passed in list from the busy extents tree.
2999 * If do_discard is set skip extents that need to be discarded, and mark
3000 * these as undergoing a discard operation instead.
3001 */
3002void
3003xfs_alloc_busy_clear(
3004 struct xfs_mount *mp,
3005 struct list_head *list,
3006 bool do_discard)
3007{
3008 struct xfs_busy_extent *busyp, *n;
3009 struct xfs_perag *pag = NULL;
3010 xfs_agnumber_t agno = NULLAGNUMBER;
3011
3012 list_for_each_entry_safe(busyp, n, list, list) {
3013 if (busyp->agno != agno) {
3014 if (pag) {
3015 spin_unlock(&pag->pagb_lock);
3016 xfs_perag_put(pag);
3017 }
3018 pag = xfs_perag_get(mp, busyp->agno);
3019 spin_lock(&pag->pagb_lock);
3020 agno = busyp->agno;
3021 }
3022
3023 if (do_discard && busyp->length &&
3024 !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD))
3025 busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
3026 else
3027 xfs_alloc_busy_clear_one(mp, pag, busyp);
3028 }
3029
3030 if (pag) {
3031 spin_unlock(&pag->pagb_lock);
3032 xfs_perag_put(pag);
3033 }
3034}
3035
3036/*
3037 * Callback for list_sort to sort busy extents by the AG they reside in.
3038 */
3039int
3040xfs_busy_extent_ag_cmp(
3041 void *priv,
3042 struct list_head *a,
3043 struct list_head *b)
3044{
3045 return container_of(a, struct xfs_busy_extent, list)->agno -
3046 container_of(b, struct xfs_busy_extent, list)->agno;
3047}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..2f52b924be79 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -137,14 +137,28 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
137#ifdef __KERNEL__ 137#ifdef __KERNEL__
138void 138void
139xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, 139xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
140 xfs_agblock_t bno, xfs_extlen_t len); 140 xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
141 141
142void 142void
143xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); 143xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list,
144 bool do_discard);
144 145
145int 146int
146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 147xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
147 xfs_agblock_t bno, xfs_extlen_t len); 148 xfs_agblock_t bno, xfs_extlen_t len);
149
150void
151xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
152 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
153
154int
155xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
156
157static inline void xfs_alloc_busy_sort(struct list_head *list)
158{
159 list_sort(NULL, list, xfs_busy_extent_ag_cmp);
160}
161
148#endif /* __KERNEL__ */ 162#endif /* __KERNEL__ */
149 163
150/* 164/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..2b3518826a69 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
95 return 0; 95 return 0;
96 } 96 }
97 97
98 xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
99
98 xfs_trans_agbtree_delta(cur->bc_tp, 1); 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
99 new->s = cpu_to_be32(bno); 101 new->s = cpu_to_be32(bno);
100 102
@@ -118,18 +120,8 @@ xfs_allocbt_free_block(
118 if (error) 120 if (error)
119 return error; 121 return error;
120 122
121 /* 123 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
122 * Since blocks move to the free list without the coordination used in 124 XFS_ALLOC_BUSY_SKIP_DISCARD);
123 * xfs_bmap_finish, we can't allow block to be available for
124 * reallocation and non-transaction writing (user data) until we know
125 * that the transaction that moved it to the free list is permanently
126 * on disk. We track the blocks by declaring these blocks as "busy";
127 * the busy list is maintained on a per-ag basis and each transaction
128 * records which entries should be removed when the iclog commits to
129 * disk. If a busy block is allocated, the iclog is pushed up to the
130 * LSN that freed the block.
131 */
132 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
133 xfs_trans_agbtree_delta(cur->bc_tp, -1); 125 xfs_trans_agbtree_delta(cur->bc_tp, -1);
134 return 0; 126 return 0;
135} 127}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index fa00788de2f5..e546a33214c9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -89,36 +89,19 @@ xfs_bmap_add_attrfork_local(
89 int *flags); /* inode logging flags */ 89 int *flags); /* inode logging flags */
90 90
91/* 91/*
92 * Called by xfs_bmapi to update file extent records and the btree
93 * after allocating space (or doing a delayed allocation).
94 */
95STATIC int /* error */
96xfs_bmap_add_extent(
97 xfs_inode_t *ip, /* incore inode pointer */
98 xfs_extnum_t idx, /* extent number to update/insert */
99 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
100 xfs_bmbt_irec_t *new, /* new data to add to file extents */
101 xfs_fsblock_t *first, /* pointer to firstblock variable */
102 xfs_bmap_free_t *flist, /* list of extents to be freed */
103 int *logflagsp, /* inode logging flags */
104 int whichfork, /* data or attr fork */
105 int rsvd); /* OK to allocate reserved blocks */
106
107/*
108 * Called by xfs_bmap_add_extent to handle cases converting a delayed 92 * Called by xfs_bmap_add_extent to handle cases converting a delayed
109 * allocation to a real allocation. 93 * allocation to a real allocation.
110 */ 94 */
111STATIC int /* error */ 95STATIC int /* error */
112xfs_bmap_add_extent_delay_real( 96xfs_bmap_add_extent_delay_real(
113 xfs_inode_t *ip, /* incore inode pointer */ 97 xfs_inode_t *ip, /* incore inode pointer */
114 xfs_extnum_t idx, /* extent number to update/insert */ 98 xfs_extnum_t *idx, /* extent number to update/insert */
115 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 99 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
116 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 100 xfs_bmbt_irec_t *new, /* new data to add to file extents */
117 xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ 101 xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */
118 xfs_fsblock_t *first, /* pointer to firstblock variable */ 102 xfs_fsblock_t *first, /* pointer to firstblock variable */
119 xfs_bmap_free_t *flist, /* list of extents to be freed */ 103 xfs_bmap_free_t *flist, /* list of extents to be freed */
120 int *logflagsp, /* inode logging flags */ 104 int *logflagsp); /* inode logging flags */
121 int rsvd); /* OK to allocate reserved blocks */
122 105
123/* 106/*
124 * Called by xfs_bmap_add_extent to handle cases converting a hole 107 * Called by xfs_bmap_add_extent to handle cases converting a hole
@@ -127,10 +110,9 @@ xfs_bmap_add_extent_delay_real(
127STATIC int /* error */ 110STATIC int /* error */
128xfs_bmap_add_extent_hole_delay( 111xfs_bmap_add_extent_hole_delay(
129 xfs_inode_t *ip, /* incore inode pointer */ 112 xfs_inode_t *ip, /* incore inode pointer */
130 xfs_extnum_t idx, /* extent number to update/insert */ 113 xfs_extnum_t *idx, /* extent number to update/insert */
131 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 114 xfs_bmbt_irec_t *new, /* new data to add to file extents */
132 int *logflagsp,/* inode logging flags */ 115 int *logflagsp); /* inode logging flags */
133 int rsvd); /* OK to allocate reserved blocks */
134 116
135/* 117/*
136 * Called by xfs_bmap_add_extent to handle cases converting a hole 118 * Called by xfs_bmap_add_extent to handle cases converting a hole
@@ -139,7 +121,7 @@ xfs_bmap_add_extent_hole_delay(
139STATIC int /* error */ 121STATIC int /* error */
140xfs_bmap_add_extent_hole_real( 122xfs_bmap_add_extent_hole_real(
141 xfs_inode_t *ip, /* incore inode pointer */ 123 xfs_inode_t *ip, /* incore inode pointer */
142 xfs_extnum_t idx, /* extent number to update/insert */ 124 xfs_extnum_t *idx, /* extent number to update/insert */
143 xfs_btree_cur_t *cur, /* if null, not a btree */ 125 xfs_btree_cur_t *cur, /* if null, not a btree */
144 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 126 xfs_bmbt_irec_t *new, /* new data to add to file extents */
145 int *logflagsp, /* inode logging flags */ 127 int *logflagsp, /* inode logging flags */
@@ -152,7 +134,7 @@ xfs_bmap_add_extent_hole_real(
152STATIC int /* error */ 134STATIC int /* error */
153xfs_bmap_add_extent_unwritten_real( 135xfs_bmap_add_extent_unwritten_real(
154 xfs_inode_t *ip, /* incore inode pointer */ 136 xfs_inode_t *ip, /* incore inode pointer */
155 xfs_extnum_t idx, /* extent number to update/insert */ 137 xfs_extnum_t *idx, /* extent number to update/insert */
156 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 138 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
157 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 139 xfs_bmbt_irec_t *new, /* new data to add to file extents */
158 int *logflagsp); /* inode logging flags */ 140 int *logflagsp); /* inode logging flags */
@@ -180,22 +162,6 @@ xfs_bmap_btree_to_extents(
180 int whichfork); /* data or attr fork */ 162 int whichfork); /* data or attr fork */
181 163
182/* 164/*
183 * Called by xfs_bmapi to update file extent records and the btree
184 * after removing space (or undoing a delayed allocation).
185 */
186STATIC int /* error */
187xfs_bmap_del_extent(
188 xfs_inode_t *ip, /* incore inode pointer */
189 xfs_trans_t *tp, /* current trans pointer */
190 xfs_extnum_t idx, /* extent number to update/insert */
191 xfs_bmap_free_t *flist, /* list of extents to be freed */
192 xfs_btree_cur_t *cur, /* if null, not a btree */
193 xfs_bmbt_irec_t *new, /* new data to add to file extents */
194 int *logflagsp,/* inode logging flags */
195 int whichfork, /* data or attr fork */
196 int rsvd); /* OK to allocate reserved blocks */
197
198/*
199 * Remove the entry "free" from the free item list. Prev points to the 165 * Remove the entry "free" from the free item list. Prev points to the
200 * previous entry, unless "free" is the head of the list. 166 * previous entry, unless "free" is the head of the list.
201 */ 167 */
@@ -474,14 +440,13 @@ xfs_bmap_add_attrfork_local(
474STATIC int /* error */ 440STATIC int /* error */
475xfs_bmap_add_extent( 441xfs_bmap_add_extent(
476 xfs_inode_t *ip, /* incore inode pointer */ 442 xfs_inode_t *ip, /* incore inode pointer */
477 xfs_extnum_t idx, /* extent number to update/insert */ 443 xfs_extnum_t *idx, /* extent number to update/insert */
478 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 444 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
479 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 445 xfs_bmbt_irec_t *new, /* new data to add to file extents */
480 xfs_fsblock_t *first, /* pointer to firstblock variable */ 446 xfs_fsblock_t *first, /* pointer to firstblock variable */
481 xfs_bmap_free_t *flist, /* list of extents to be freed */ 447 xfs_bmap_free_t *flist, /* list of extents to be freed */
482 int *logflagsp, /* inode logging flags */ 448 int *logflagsp, /* inode logging flags */
483 int whichfork, /* data or attr fork */ 449 int whichfork) /* data or attr fork */
484 int rsvd) /* OK to use reserved data blocks */
485{ 450{
486 xfs_btree_cur_t *cur; /* btree cursor or null */ 451 xfs_btree_cur_t *cur; /* btree cursor or null */
487 xfs_filblks_t da_new; /* new count del alloc blocks used */ 452 xfs_filblks_t da_new; /* new count del alloc blocks used */
@@ -492,23 +457,27 @@ xfs_bmap_add_extent(
492 xfs_extnum_t nextents; /* number of extents in file now */ 457 xfs_extnum_t nextents; /* number of extents in file now */
493 458
494 XFS_STATS_INC(xs_add_exlist); 459 XFS_STATS_INC(xs_add_exlist);
460
495 cur = *curp; 461 cur = *curp;
496 ifp = XFS_IFORK_PTR(ip, whichfork); 462 ifp = XFS_IFORK_PTR(ip, whichfork);
497 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 463 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
498 ASSERT(idx <= nextents);
499 da_old = da_new = 0; 464 da_old = da_new = 0;
500 error = 0; 465 error = 0;
466
467 ASSERT(*idx >= 0);
468 ASSERT(*idx <= nextents);
469
501 /* 470 /*
502 * This is the first extent added to a new/empty file. 471 * This is the first extent added to a new/empty file.
503 * Special case this one, so other routines get to assume there are 472 * Special case this one, so other routines get to assume there are
504 * already extents in the list. 473 * already extents in the list.
505 */ 474 */
506 if (nextents == 0) { 475 if (nextents == 0) {
507 xfs_iext_insert(ip, 0, 1, new, 476 xfs_iext_insert(ip, *idx, 1, new,
508 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); 477 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
509 478
510 ASSERT(cur == NULL); 479 ASSERT(cur == NULL);
511 ifp->if_lastex = 0; 480
512 if (!isnullstartblock(new->br_startblock)) { 481 if (!isnullstartblock(new->br_startblock)) {
513 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 482 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
514 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 483 logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
@@ -522,27 +491,25 @@ xfs_bmap_add_extent(
522 if (cur) 491 if (cur)
523 ASSERT((cur->bc_private.b.flags & 492 ASSERT((cur->bc_private.b.flags &
524 XFS_BTCUR_BPRV_WASDEL) == 0); 493 XFS_BTCUR_BPRV_WASDEL) == 0);
525 if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, 494 error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
526 &logflags, rsvd))) 495 &logflags);
527 goto done;
528 } 496 }
529 /* 497 /*
530 * Real allocation off the end of the file. 498 * Real allocation off the end of the file.
531 */ 499 */
532 else if (idx == nextents) { 500 else if (*idx == nextents) {
533 if (cur) 501 if (cur)
534 ASSERT((cur->bc_private.b.flags & 502 ASSERT((cur->bc_private.b.flags &
535 XFS_BTCUR_BPRV_WASDEL) == 0); 503 XFS_BTCUR_BPRV_WASDEL) == 0);
536 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, 504 error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
537 &logflags, whichfork))) 505 &logflags, whichfork);
538 goto done;
539 } else { 506 } else {
540 xfs_bmbt_irec_t prev; /* old extent at offset idx */ 507 xfs_bmbt_irec_t prev; /* old extent at offset idx */
541 508
542 /* 509 /*
543 * Get the record referred to by idx. 510 * Get the record referred to by idx.
544 */ 511 */
545 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); 512 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev);
546 /* 513 /*
547 * If it's a real allocation record, and the new allocation ends 514 * If it's a real allocation record, and the new allocation ends
548 * after the start of the referred to record, then we're filling 515 * after the start of the referred to record, then we're filling
@@ -557,22 +524,18 @@ xfs_bmap_add_extent(
557 if (cur) 524 if (cur)
558 ASSERT(cur->bc_private.b.flags & 525 ASSERT(cur->bc_private.b.flags &
559 XFS_BTCUR_BPRV_WASDEL); 526 XFS_BTCUR_BPRV_WASDEL);
560 if ((error = xfs_bmap_add_extent_delay_real(ip, 527 error = xfs_bmap_add_extent_delay_real(ip,
561 idx, &cur, new, &da_new, first, flist, 528 idx, &cur, new, &da_new,
562 &logflags, rsvd))) 529 first, flist, &logflags);
563 goto done;
564 } else if (new->br_state == XFS_EXT_NORM) {
565 ASSERT(new->br_state == XFS_EXT_NORM);
566 if ((error = xfs_bmap_add_extent_unwritten_real(
567 ip, idx, &cur, new, &logflags)))
568 goto done;
569 } else { 530 } else {
570 ASSERT(new->br_state == XFS_EXT_UNWRITTEN); 531 ASSERT(new->br_state == XFS_EXT_NORM ||
571 if ((error = xfs_bmap_add_extent_unwritten_real( 532 new->br_state == XFS_EXT_UNWRITTEN);
572 ip, idx, &cur, new, &logflags))) 533
534 error = xfs_bmap_add_extent_unwritten_real(ip,
535 idx, &cur, new, &logflags);
536 if (error)
573 goto done; 537 goto done;
574 } 538 }
575 ASSERT(*curp == cur || *curp == NULL);
576 } 539 }
577 /* 540 /*
578 * Otherwise we're filling in a hole with an allocation. 541 * Otherwise we're filling in a hole with an allocation.
@@ -581,13 +544,15 @@ xfs_bmap_add_extent(
581 if (cur) 544 if (cur)
582 ASSERT((cur->bc_private.b.flags & 545 ASSERT((cur->bc_private.b.flags &
583 XFS_BTCUR_BPRV_WASDEL) == 0); 546 XFS_BTCUR_BPRV_WASDEL) == 0);
584 if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, 547 error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
585 new, &logflags, whichfork))) 548 new, &logflags, whichfork);
586 goto done;
587 } 549 }
588 } 550 }
589 551
552 if (error)
553 goto done;
590 ASSERT(*curp == cur || *curp == NULL); 554 ASSERT(*curp == cur || *curp == NULL);
555
591 /* 556 /*
592 * Convert to a btree if necessary. 557 * Convert to a btree if necessary.
593 */ 558 */
@@ -615,7 +580,7 @@ xfs_bmap_add_extent(
615 ASSERT(nblks <= da_old); 580 ASSERT(nblks <= da_old);
616 if (nblks < da_old) 581 if (nblks < da_old)
617 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 582 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
618 (int64_t)(da_old - nblks), rsvd); 583 (int64_t)(da_old - nblks), 0);
619 } 584 }
620 /* 585 /*
621 * Clear out the allocated field, done with it now in any case. 586 * Clear out the allocated field, done with it now in any case.
@@ -640,14 +605,13 @@ done:
640STATIC int /* error */ 605STATIC int /* error */
641xfs_bmap_add_extent_delay_real( 606xfs_bmap_add_extent_delay_real(
642 xfs_inode_t *ip, /* incore inode pointer */ 607 xfs_inode_t *ip, /* incore inode pointer */
643 xfs_extnum_t idx, /* extent number to update/insert */ 608 xfs_extnum_t *idx, /* extent number to update/insert */
644 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 609 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
645 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 610 xfs_bmbt_irec_t *new, /* new data to add to file extents */
646 xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ 611 xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */
647 xfs_fsblock_t *first, /* pointer to firstblock variable */ 612 xfs_fsblock_t *first, /* pointer to firstblock variable */
648 xfs_bmap_free_t *flist, /* list of extents to be freed */ 613 xfs_bmap_free_t *flist, /* list of extents to be freed */
649 int *logflagsp, /* inode logging flags */ 614 int *logflagsp) /* inode logging flags */
650 int rsvd) /* OK to use reserved data block allocation */
651{ 615{
652 xfs_btree_cur_t *cur; /* btree cursor */ 616 xfs_btree_cur_t *cur; /* btree cursor */
653 int diff; /* temp value */ 617 int diff; /* temp value */
@@ -673,7 +637,7 @@ xfs_bmap_add_extent_delay_real(
673 */ 637 */
674 cur = *curp; 638 cur = *curp;
675 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 639 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
676 ep = xfs_iext_get_ext(ifp, idx); 640 ep = xfs_iext_get_ext(ifp, *idx);
677 xfs_bmbt_get_all(ep, &PREV); 641 xfs_bmbt_get_all(ep, &PREV);
678 new_endoff = new->br_startoff + new->br_blockcount; 642 new_endoff = new->br_startoff + new->br_blockcount;
679 ASSERT(PREV.br_startoff <= new->br_startoff); 643 ASSERT(PREV.br_startoff <= new->br_startoff);
@@ -692,9 +656,9 @@ xfs_bmap_add_extent_delay_real(
692 * Check and set flags if this segment has a left neighbor. 656 * Check and set flags if this segment has a left neighbor.
693 * Don't set contiguous if the combined extent would be too large. 657 * Don't set contiguous if the combined extent would be too large.
694 */ 658 */
695 if (idx > 0) { 659 if (*idx > 0) {
696 state |= BMAP_LEFT_VALID; 660 state |= BMAP_LEFT_VALID;
697 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 661 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
698 662
699 if (isnullstartblock(LEFT.br_startblock)) 663 if (isnullstartblock(LEFT.br_startblock))
700 state |= BMAP_LEFT_DELAY; 664 state |= BMAP_LEFT_DELAY;
@@ -712,9 +676,9 @@ xfs_bmap_add_extent_delay_real(
712 * Don't set contiguous if the combined extent would be too large. 676 * Don't set contiguous if the combined extent would be too large.
713 * Also check for all-three-contiguous being too large. 677 * Also check for all-three-contiguous being too large.
714 */ 678 */
715 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { 679 if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
716 state |= BMAP_RIGHT_VALID; 680 state |= BMAP_RIGHT_VALID;
717 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 681 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
718 682
719 if (isnullstartblock(RIGHT.br_startblock)) 683 if (isnullstartblock(RIGHT.br_startblock))
720 state |= BMAP_RIGHT_DELAY; 684 state |= BMAP_RIGHT_DELAY;
@@ -745,14 +709,14 @@ xfs_bmap_add_extent_delay_real(
745 * Filling in all of a previously delayed allocation extent. 709 * Filling in all of a previously delayed allocation extent.
746 * The left and right neighbors are both contiguous with new. 710 * The left and right neighbors are both contiguous with new.
747 */ 711 */
748 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 712 --*idx;
749 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 713 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
714 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
750 LEFT.br_blockcount + PREV.br_blockcount + 715 LEFT.br_blockcount + PREV.br_blockcount +
751 RIGHT.br_blockcount); 716 RIGHT.br_blockcount);
752 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 717 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
753 718
754 xfs_iext_remove(ip, idx, 2, state); 719 xfs_iext_remove(ip, *idx + 1, 2, state);
755 ip->i_df.if_lastex = idx - 1;
756 ip->i_d.di_nextents--; 720 ip->i_d.di_nextents--;
757 if (cur == NULL) 721 if (cur == NULL)
758 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 722 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -784,13 +748,14 @@ xfs_bmap_add_extent_delay_real(
784 * Filling in all of a previously delayed allocation extent. 748 * Filling in all of a previously delayed allocation extent.
785 * The left neighbor is contiguous, the right is not. 749 * The left neighbor is contiguous, the right is not.
786 */ 750 */
787 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 751 --*idx;
788 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 752
753 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
754 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
789 LEFT.br_blockcount + PREV.br_blockcount); 755 LEFT.br_blockcount + PREV.br_blockcount);
790 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 756 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
791 757
792 ip->i_df.if_lastex = idx - 1; 758 xfs_iext_remove(ip, *idx + 1, 1, state);
793 xfs_iext_remove(ip, idx, 1, state);
794 if (cur == NULL) 759 if (cur == NULL)
795 rval = XFS_ILOG_DEXT; 760 rval = XFS_ILOG_DEXT;
796 else { 761 else {
@@ -814,14 +779,13 @@ xfs_bmap_add_extent_delay_real(
814 * Filling in all of a previously delayed allocation extent. 779 * Filling in all of a previously delayed allocation extent.
815 * The right neighbor is contiguous, the left is not. 780 * The right neighbor is contiguous, the left is not.
816 */ 781 */
817 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 782 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
818 xfs_bmbt_set_startblock(ep, new->br_startblock); 783 xfs_bmbt_set_startblock(ep, new->br_startblock);
819 xfs_bmbt_set_blockcount(ep, 784 xfs_bmbt_set_blockcount(ep,
820 PREV.br_blockcount + RIGHT.br_blockcount); 785 PREV.br_blockcount + RIGHT.br_blockcount);
821 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 786 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
822 787
823 ip->i_df.if_lastex = idx; 788 xfs_iext_remove(ip, *idx + 1, 1, state);
824 xfs_iext_remove(ip, idx + 1, 1, state);
825 if (cur == NULL) 789 if (cur == NULL)
826 rval = XFS_ILOG_DEXT; 790 rval = XFS_ILOG_DEXT;
827 else { 791 else {
@@ -837,6 +801,7 @@ xfs_bmap_add_extent_delay_real(
837 RIGHT.br_blockcount, PREV.br_state))) 801 RIGHT.br_blockcount, PREV.br_state)))
838 goto done; 802 goto done;
839 } 803 }
804
840 *dnew = 0; 805 *dnew = 0;
841 break; 806 break;
842 807
@@ -846,11 +811,10 @@ xfs_bmap_add_extent_delay_real(
846 * Neither the left nor right neighbors are contiguous with 811 * Neither the left nor right neighbors are contiguous with
847 * the new one. 812 * the new one.
848 */ 813 */
849 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 814 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
850 xfs_bmbt_set_startblock(ep, new->br_startblock); 815 xfs_bmbt_set_startblock(ep, new->br_startblock);
851 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 816 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
852 817
853 ip->i_df.if_lastex = idx;
854 ip->i_d.di_nextents++; 818 ip->i_d.di_nextents++;
855 if (cur == NULL) 819 if (cur == NULL)
856 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 820 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -866,6 +830,7 @@ xfs_bmap_add_extent_delay_real(
866 goto done; 830 goto done;
867 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 831 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
868 } 832 }
833
869 *dnew = 0; 834 *dnew = 0;
870 break; 835 break;
871 836
@@ -874,17 +839,16 @@ xfs_bmap_add_extent_delay_real(
874 * Filling in the first part of a previous delayed allocation. 839 * Filling in the first part of a previous delayed allocation.
875 * The left neighbor is contiguous. 840 * The left neighbor is contiguous.
876 */ 841 */
877 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 842 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
878 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 843 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
879 LEFT.br_blockcount + new->br_blockcount); 844 LEFT.br_blockcount + new->br_blockcount);
880 xfs_bmbt_set_startoff(ep, 845 xfs_bmbt_set_startoff(ep,
881 PREV.br_startoff + new->br_blockcount); 846 PREV.br_startoff + new->br_blockcount);
882 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 847 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
883 848
884 temp = PREV.br_blockcount - new->br_blockcount; 849 temp = PREV.br_blockcount - new->br_blockcount;
885 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 850 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
886 xfs_bmbt_set_blockcount(ep, temp); 851 xfs_bmbt_set_blockcount(ep, temp);
887 ip->i_df.if_lastex = idx - 1;
888 if (cur == NULL) 852 if (cur == NULL)
889 rval = XFS_ILOG_DEXT; 853 rval = XFS_ILOG_DEXT;
890 else { 854 else {
@@ -904,7 +868,9 @@ xfs_bmap_add_extent_delay_real(
904 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 868 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
905 startblockval(PREV.br_startblock)); 869 startblockval(PREV.br_startblock));
906 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 870 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
907 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 871 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
872
873 --*idx;
908 *dnew = temp; 874 *dnew = temp;
909 break; 875 break;
910 876
@@ -913,12 +879,11 @@ xfs_bmap_add_extent_delay_real(
913 * Filling in the first part of a previous delayed allocation. 879 * Filling in the first part of a previous delayed allocation.
914 * The left neighbor is not contiguous. 880 * The left neighbor is not contiguous.
915 */ 881 */
916 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 882 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
917 xfs_bmbt_set_startoff(ep, new_endoff); 883 xfs_bmbt_set_startoff(ep, new_endoff);
918 temp = PREV.br_blockcount - new->br_blockcount; 884 temp = PREV.br_blockcount - new->br_blockcount;
919 xfs_bmbt_set_blockcount(ep, temp); 885 xfs_bmbt_set_blockcount(ep, temp);
920 xfs_iext_insert(ip, idx, 1, new, state); 886 xfs_iext_insert(ip, *idx, 1, new, state);
921 ip->i_df.if_lastex = idx;
922 ip->i_d.di_nextents++; 887 ip->i_d.di_nextents++;
923 if (cur == NULL) 888 if (cur == NULL)
924 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 889 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -946,9 +911,10 @@ xfs_bmap_add_extent_delay_real(
946 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 911 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
947 startblockval(PREV.br_startblock) - 912 startblockval(PREV.br_startblock) -
948 (cur ? cur->bc_private.b.allocated : 0)); 913 (cur ? cur->bc_private.b.allocated : 0));
949 ep = xfs_iext_get_ext(ifp, idx + 1); 914 ep = xfs_iext_get_ext(ifp, *idx + 1);
950 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 915 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
951 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); 916 trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
917
952 *dnew = temp; 918 *dnew = temp;
953 break; 919 break;
954 920
@@ -958,15 +924,13 @@ xfs_bmap_add_extent_delay_real(
958 * The right neighbor is contiguous with the new allocation. 924 * The right neighbor is contiguous with the new allocation.
959 */ 925 */
960 temp = PREV.br_blockcount - new->br_blockcount; 926 temp = PREV.br_blockcount - new->br_blockcount;
961 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 927 trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_);
962 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
963 xfs_bmbt_set_blockcount(ep, temp); 928 xfs_bmbt_set_blockcount(ep, temp);
964 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 929 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1),
965 new->br_startoff, new->br_startblock, 930 new->br_startoff, new->br_startblock,
966 new->br_blockcount + RIGHT.br_blockcount, 931 new->br_blockcount + RIGHT.br_blockcount,
967 RIGHT.br_state); 932 RIGHT.br_state);
968 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); 933 trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
969 ip->i_df.if_lastex = idx + 1;
970 if (cur == NULL) 934 if (cur == NULL)
971 rval = XFS_ILOG_DEXT; 935 rval = XFS_ILOG_DEXT;
972 else { 936 else {
@@ -983,10 +947,14 @@ xfs_bmap_add_extent_delay_real(
983 RIGHT.br_state))) 947 RIGHT.br_state)))
984 goto done; 948 goto done;
985 } 949 }
950
986 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 951 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
987 startblockval(PREV.br_startblock)); 952 startblockval(PREV.br_startblock));
953 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
988 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 954 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
989 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 955 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
956
957 ++*idx;
990 *dnew = temp; 958 *dnew = temp;
991 break; 959 break;
992 960
@@ -996,10 +964,9 @@ xfs_bmap_add_extent_delay_real(
996 * The right neighbor is not contiguous. 964 * The right neighbor is not contiguous.
997 */ 965 */
998 temp = PREV.br_blockcount - new->br_blockcount; 966 temp = PREV.br_blockcount - new->br_blockcount;
999 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 967 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1000 xfs_bmbt_set_blockcount(ep, temp); 968 xfs_bmbt_set_blockcount(ep, temp);
1001 xfs_iext_insert(ip, idx + 1, 1, new, state); 969 xfs_iext_insert(ip, *idx + 1, 1, new, state);
1002 ip->i_df.if_lastex = idx + 1;
1003 ip->i_d.di_nextents++; 970 ip->i_d.di_nextents++;
1004 if (cur == NULL) 971 if (cur == NULL)
1005 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 972 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1027,9 +994,11 @@ xfs_bmap_add_extent_delay_real(
1027 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 994 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1028 startblockval(PREV.br_startblock) - 995 startblockval(PREV.br_startblock) -
1029 (cur ? cur->bc_private.b.allocated : 0)); 996 (cur ? cur->bc_private.b.allocated : 0));
1030 ep = xfs_iext_get_ext(ifp, idx); 997 ep = xfs_iext_get_ext(ifp, *idx);
1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 998 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 999 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1000
1001 ++*idx;
1033 *dnew = temp; 1002 *dnew = temp;
1034 break; 1003 break;
1035 1004
@@ -1056,7 +1025,7 @@ xfs_bmap_add_extent_delay_real(
1056 */ 1025 */
1057 temp = new->br_startoff - PREV.br_startoff; 1026 temp = new->br_startoff - PREV.br_startoff;
1058 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1027 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1059 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); 1028 trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_);
1060 xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ 1029 xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
1061 LEFT = *new; 1030 LEFT = *new;
1062 RIGHT.br_state = PREV.br_state; 1031 RIGHT.br_state = PREV.br_state;
@@ -1065,8 +1034,7 @@ xfs_bmap_add_extent_delay_real(
1065 RIGHT.br_startoff = new_endoff; 1034 RIGHT.br_startoff = new_endoff;
1066 RIGHT.br_blockcount = temp2; 1035 RIGHT.br_blockcount = temp2;
1067 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ 1036 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
1068 xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); 1037 xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state);
1069 ip->i_df.if_lastex = idx + 1;
1070 ip->i_d.di_nextents++; 1038 ip->i_d.di_nextents++;
1071 if (cur == NULL) 1039 if (cur == NULL)
1072 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1040 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1097,7 +1065,7 @@ xfs_bmap_add_extent_delay_real(
1097 (cur ? cur->bc_private.b.allocated : 0)); 1065 (cur ? cur->bc_private.b.allocated : 0));
1098 if (diff > 0 && 1066 if (diff > 0 &&
1099 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 1067 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
1100 -((int64_t)diff), rsvd)) { 1068 -((int64_t)diff), 0)) {
1101 /* 1069 /*
1102 * Ick gross gag me with a spoon. 1070 * Ick gross gag me with a spoon.
1103 */ 1071 */
@@ -1109,7 +1077,7 @@ xfs_bmap_add_extent_delay_real(
1109 if (!diff || 1077 if (!diff ||
1110 !xfs_icsb_modify_counters(ip->i_mount, 1078 !xfs_icsb_modify_counters(ip->i_mount,
1111 XFS_SBS_FDBLOCKS, 1079 XFS_SBS_FDBLOCKS,
1112 -((int64_t)diff), rsvd)) 1080 -((int64_t)diff), 0))
1113 break; 1081 break;
1114 } 1082 }
1115 if (temp2) { 1083 if (temp2) {
@@ -1118,18 +1086,20 @@ xfs_bmap_add_extent_delay_real(
1118 if (!diff || 1086 if (!diff ||
1119 !xfs_icsb_modify_counters(ip->i_mount, 1087 !xfs_icsb_modify_counters(ip->i_mount,
1120 XFS_SBS_FDBLOCKS, 1088 XFS_SBS_FDBLOCKS,
1121 -((int64_t)diff), rsvd)) 1089 -((int64_t)diff), 0))
1122 break; 1090 break;
1123 } 1091 }
1124 } 1092 }
1125 } 1093 }
1126 ep = xfs_iext_get_ext(ifp, idx); 1094 ep = xfs_iext_get_ext(ifp, *idx);
1127 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1095 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1128 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1096 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1129 trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); 1097 trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_);
1130 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1098 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2),
1131 nullstartblock((int)temp2)); 1099 nullstartblock((int)temp2));
1132 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); 1100 trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_);
1101
1102 ++*idx;
1133 *dnew = temp + temp2; 1103 *dnew = temp + temp2;
1134 break; 1104 break;
1135 1105
@@ -1161,7 +1131,7 @@ done:
1161STATIC int /* error */ 1131STATIC int /* error */
1162xfs_bmap_add_extent_unwritten_real( 1132xfs_bmap_add_extent_unwritten_real(
1163 xfs_inode_t *ip, /* incore inode pointer */ 1133 xfs_inode_t *ip, /* incore inode pointer */
1164 xfs_extnum_t idx, /* extent number to update/insert */ 1134 xfs_extnum_t *idx, /* extent number to update/insert */
1165 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 1135 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
1166 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1136 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1167 int *logflagsp) /* inode logging flags */ 1137 int *logflagsp) /* inode logging flags */
@@ -1188,7 +1158,7 @@ xfs_bmap_add_extent_unwritten_real(
1188 error = 0; 1158 error = 0;
1189 cur = *curp; 1159 cur = *curp;
1190 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1160 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1191 ep = xfs_iext_get_ext(ifp, idx); 1161 ep = xfs_iext_get_ext(ifp, *idx);
1192 xfs_bmbt_get_all(ep, &PREV); 1162 xfs_bmbt_get_all(ep, &PREV);
1193 newext = new->br_state; 1163 newext = new->br_state;
1194 oldext = (newext == XFS_EXT_UNWRITTEN) ? 1164 oldext = (newext == XFS_EXT_UNWRITTEN) ?
@@ -1211,9 +1181,9 @@ xfs_bmap_add_extent_unwritten_real(
1211 * Check and set flags if this segment has a left neighbor. 1181 * Check and set flags if this segment has a left neighbor.
1212 * Don't set contiguous if the combined extent would be too large. 1182 * Don't set contiguous if the combined extent would be too large.
1213 */ 1183 */
1214 if (idx > 0) { 1184 if (*idx > 0) {
1215 state |= BMAP_LEFT_VALID; 1185 state |= BMAP_LEFT_VALID;
1216 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 1186 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
1217 1187
1218 if (isnullstartblock(LEFT.br_startblock)) 1188 if (isnullstartblock(LEFT.br_startblock))
1219 state |= BMAP_LEFT_DELAY; 1189 state |= BMAP_LEFT_DELAY;
@@ -1231,9 +1201,9 @@ xfs_bmap_add_extent_unwritten_real(
1231 * Don't set contiguous if the combined extent would be too large. 1201 * Don't set contiguous if the combined extent would be too large.
1232 * Also check for all-three-contiguous being too large. 1202 * Also check for all-three-contiguous being too large.
1233 */ 1203 */
1234 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { 1204 if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1235 state |= BMAP_RIGHT_VALID; 1205 state |= BMAP_RIGHT_VALID;
1236 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 1206 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
1237 if (isnullstartblock(RIGHT.br_startblock)) 1207 if (isnullstartblock(RIGHT.br_startblock))
1238 state |= BMAP_RIGHT_DELAY; 1208 state |= BMAP_RIGHT_DELAY;
1239 } 1209 }
@@ -1262,14 +1232,15 @@ xfs_bmap_add_extent_unwritten_real(
1262 * Setting all of a previous oldext extent to newext. 1232 * Setting all of a previous oldext extent to newext.
1263 * The left and right neighbors are both contiguous with new. 1233 * The left and right neighbors are both contiguous with new.
1264 */ 1234 */
1265 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1235 --*idx;
1266 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1236
1237 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1238 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1267 LEFT.br_blockcount + PREV.br_blockcount + 1239 LEFT.br_blockcount + PREV.br_blockcount +
1268 RIGHT.br_blockcount); 1240 RIGHT.br_blockcount);
1269 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1241 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1270 1242
1271 xfs_iext_remove(ip, idx, 2, state); 1243 xfs_iext_remove(ip, *idx + 1, 2, state);
1272 ip->i_df.if_lastex = idx - 1;
1273 ip->i_d.di_nextents -= 2; 1244 ip->i_d.di_nextents -= 2;
1274 if (cur == NULL) 1245 if (cur == NULL)
1275 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1246 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1305,13 +1276,14 @@ xfs_bmap_add_extent_unwritten_real(
1305 * Setting all of a previous oldext extent to newext. 1276 * Setting all of a previous oldext extent to newext.
1306 * The left neighbor is contiguous, the right is not. 1277 * The left neighbor is contiguous, the right is not.
1307 */ 1278 */
1308 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1279 --*idx;
1309 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1280
1281 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1282 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1310 LEFT.br_blockcount + PREV.br_blockcount); 1283 LEFT.br_blockcount + PREV.br_blockcount);
1311 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1284 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1312 1285
1313 ip->i_df.if_lastex = idx - 1; 1286 xfs_iext_remove(ip, *idx + 1, 1, state);
1314 xfs_iext_remove(ip, idx, 1, state);
1315 ip->i_d.di_nextents--; 1287 ip->i_d.di_nextents--;
1316 if (cur == NULL) 1288 if (cur == NULL)
1317 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1289 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1341,13 +1313,12 @@ xfs_bmap_add_extent_unwritten_real(
1341 * Setting all of a previous oldext extent to newext. 1313 * Setting all of a previous oldext extent to newext.
1342 * The right neighbor is contiguous, the left is not. 1314 * The right neighbor is contiguous, the left is not.
1343 */ 1315 */
1344 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1316 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1345 xfs_bmbt_set_blockcount(ep, 1317 xfs_bmbt_set_blockcount(ep,
1346 PREV.br_blockcount + RIGHT.br_blockcount); 1318 PREV.br_blockcount + RIGHT.br_blockcount);
1347 xfs_bmbt_set_state(ep, newext); 1319 xfs_bmbt_set_state(ep, newext);
1348 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1320 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1349 ip->i_df.if_lastex = idx; 1321 xfs_iext_remove(ip, *idx + 1, 1, state);
1350 xfs_iext_remove(ip, idx + 1, 1, state);
1351 ip->i_d.di_nextents--; 1322 ip->i_d.di_nextents--;
1352 if (cur == NULL) 1323 if (cur == NULL)
1353 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1324 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1378,11 +1349,10 @@ xfs_bmap_add_extent_unwritten_real(
1378 * Neither the left nor right neighbors are contiguous with 1349 * Neither the left nor right neighbors are contiguous with
1379 * the new one. 1350 * the new one.
1380 */ 1351 */
1381 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1352 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1382 xfs_bmbt_set_state(ep, newext); 1353 xfs_bmbt_set_state(ep, newext);
1383 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1354 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1384 1355
1385 ip->i_df.if_lastex = idx;
1386 if (cur == NULL) 1356 if (cur == NULL)
1387 rval = XFS_ILOG_DEXT; 1357 rval = XFS_ILOG_DEXT;
1388 else { 1358 else {
@@ -1404,21 +1374,22 @@ xfs_bmap_add_extent_unwritten_real(
1404 * Setting the first part of a previous oldext extent to newext. 1374 * Setting the first part of a previous oldext extent to newext.
1405 * The left neighbor is contiguous. 1375 * The left neighbor is contiguous.
1406 */ 1376 */
1407 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1377 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
1408 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1378 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
1409 LEFT.br_blockcount + new->br_blockcount); 1379 LEFT.br_blockcount + new->br_blockcount);
1410 xfs_bmbt_set_startoff(ep, 1380 xfs_bmbt_set_startoff(ep,
1411 PREV.br_startoff + new->br_blockcount); 1381 PREV.br_startoff + new->br_blockcount);
1412 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1382 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
1413 1383
1414 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1384 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1415 xfs_bmbt_set_startblock(ep, 1385 xfs_bmbt_set_startblock(ep,
1416 new->br_startblock + new->br_blockcount); 1386 new->br_startblock + new->br_blockcount);
1417 xfs_bmbt_set_blockcount(ep, 1387 xfs_bmbt_set_blockcount(ep,
1418 PREV.br_blockcount - new->br_blockcount); 1388 PREV.br_blockcount - new->br_blockcount);
1419 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1389 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1390
1391 --*idx;
1420 1392
1421 ip->i_df.if_lastex = idx - 1;
1422 if (cur == NULL) 1393 if (cur == NULL)
1423 rval = XFS_ILOG_DEXT; 1394 rval = XFS_ILOG_DEXT;
1424 else { 1395 else {
@@ -1449,17 +1420,16 @@ xfs_bmap_add_extent_unwritten_real(
1449 * Setting the first part of a previous oldext extent to newext. 1420 * Setting the first part of a previous oldext extent to newext.
1450 * The left neighbor is not contiguous. 1421 * The left neighbor is not contiguous.
1451 */ 1422 */
1452 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1423 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1453 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1424 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1454 xfs_bmbt_set_startoff(ep, new_endoff); 1425 xfs_bmbt_set_startoff(ep, new_endoff);
1455 xfs_bmbt_set_blockcount(ep, 1426 xfs_bmbt_set_blockcount(ep,
1456 PREV.br_blockcount - new->br_blockcount); 1427 PREV.br_blockcount - new->br_blockcount);
1457 xfs_bmbt_set_startblock(ep, 1428 xfs_bmbt_set_startblock(ep,
1458 new->br_startblock + new->br_blockcount); 1429 new->br_startblock + new->br_blockcount);
1459 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1430 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1460 1431
1461 xfs_iext_insert(ip, idx, 1, new, state); 1432 xfs_iext_insert(ip, *idx, 1, new, state);
1462 ip->i_df.if_lastex = idx;
1463 ip->i_d.di_nextents++; 1433 ip->i_d.di_nextents++;
1464 if (cur == NULL) 1434 if (cur == NULL)
1465 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1435 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1488,17 +1458,19 @@ xfs_bmap_add_extent_unwritten_real(
1488 * Setting the last part of a previous oldext extent to newext. 1458 * Setting the last part of a previous oldext extent to newext.
1489 * The right neighbor is contiguous with the new allocation. 1459 * The right neighbor is contiguous with the new allocation.
1490 */ 1460 */
1491 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1461 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1492 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1493 xfs_bmbt_set_blockcount(ep, 1462 xfs_bmbt_set_blockcount(ep,
1494 PREV.br_blockcount - new->br_blockcount); 1463 PREV.br_blockcount - new->br_blockcount);
1495 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1464 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1496 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1465
1466 ++*idx;
1467
1468 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1469 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1497 new->br_startoff, new->br_startblock, 1470 new->br_startoff, new->br_startblock,
1498 new->br_blockcount + RIGHT.br_blockcount, newext); 1471 new->br_blockcount + RIGHT.br_blockcount, newext);
1499 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); 1472 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1500 1473
1501 ip->i_df.if_lastex = idx + 1;
1502 if (cur == NULL) 1474 if (cur == NULL)
1503 rval = XFS_ILOG_DEXT; 1475 rval = XFS_ILOG_DEXT;
1504 else { 1476 else {
@@ -1528,13 +1500,14 @@ xfs_bmap_add_extent_unwritten_real(
1528 * Setting the last part of a previous oldext extent to newext. 1500 * Setting the last part of a previous oldext extent to newext.
1529 * The right neighbor is not contiguous. 1501 * The right neighbor is not contiguous.
1530 */ 1502 */
1531 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1503 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1532 xfs_bmbt_set_blockcount(ep, 1504 xfs_bmbt_set_blockcount(ep,
1533 PREV.br_blockcount - new->br_blockcount); 1505 PREV.br_blockcount - new->br_blockcount);
1534 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1506 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1507
1508 ++*idx;
1509 xfs_iext_insert(ip, *idx, 1, new, state);
1535 1510
1536 xfs_iext_insert(ip, idx + 1, 1, new, state);
1537 ip->i_df.if_lastex = idx + 1;
1538 ip->i_d.di_nextents++; 1511 ip->i_d.di_nextents++;
1539 if (cur == NULL) 1512 if (cur == NULL)
1540 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1513 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1568,10 +1541,10 @@ xfs_bmap_add_extent_unwritten_real(
1568 * newext. Contiguity is impossible here. 1541 * newext. Contiguity is impossible here.
1569 * One extent becomes three extents. 1542 * One extent becomes three extents.
1570 */ 1543 */
1571 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1544 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1572 xfs_bmbt_set_blockcount(ep, 1545 xfs_bmbt_set_blockcount(ep,
1573 new->br_startoff - PREV.br_startoff); 1546 new->br_startoff - PREV.br_startoff);
1574 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1547 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1575 1548
1576 r[0] = *new; 1549 r[0] = *new;
1577 r[1].br_startoff = new_endoff; 1550 r[1].br_startoff = new_endoff;
@@ -1579,8 +1552,10 @@ xfs_bmap_add_extent_unwritten_real(
1579 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1552 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1580 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1553 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1581 r[1].br_state = oldext; 1554 r[1].br_state = oldext;
1582 xfs_iext_insert(ip, idx + 1, 2, &r[0], state); 1555
1583 ip->i_df.if_lastex = idx + 1; 1556 ++*idx;
1557 xfs_iext_insert(ip, *idx, 2, &r[0], state);
1558
1584 ip->i_d.di_nextents += 2; 1559 ip->i_d.di_nextents += 2;
1585 if (cur == NULL) 1560 if (cur == NULL)
1586 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1561 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1650,12 +1625,10 @@ done:
1650STATIC int /* error */ 1625STATIC int /* error */
1651xfs_bmap_add_extent_hole_delay( 1626xfs_bmap_add_extent_hole_delay(
1652 xfs_inode_t *ip, /* incore inode pointer */ 1627 xfs_inode_t *ip, /* incore inode pointer */
1653 xfs_extnum_t idx, /* extent number to update/insert */ 1628 xfs_extnum_t *idx, /* extent number to update/insert */
1654 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1629 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1655 int *logflagsp, /* inode logging flags */ 1630 int *logflagsp) /* inode logging flags */
1656 int rsvd) /* OK to allocate reserved blocks */
1657{ 1631{
1658 xfs_bmbt_rec_host_t *ep; /* extent record for idx */
1659 xfs_ifork_t *ifp; /* inode fork pointer */ 1632 xfs_ifork_t *ifp; /* inode fork pointer */
1660 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 1633 xfs_bmbt_irec_t left; /* left neighbor extent entry */
1661 xfs_filblks_t newlen=0; /* new indirect size */ 1634 xfs_filblks_t newlen=0; /* new indirect size */
@@ -1665,16 +1638,15 @@ xfs_bmap_add_extent_hole_delay(
1665 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1638 xfs_filblks_t temp=0; /* temp for indirect calculations */
1666 1639
1667 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1640 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1668 ep = xfs_iext_get_ext(ifp, idx);
1669 state = 0; 1641 state = 0;
1670 ASSERT(isnullstartblock(new->br_startblock)); 1642 ASSERT(isnullstartblock(new->br_startblock));
1671 1643
1672 /* 1644 /*
1673 * Check and set flags if this segment has a left neighbor 1645 * Check and set flags if this segment has a left neighbor
1674 */ 1646 */
1675 if (idx > 0) { 1647 if (*idx > 0) {
1676 state |= BMAP_LEFT_VALID; 1648 state |= BMAP_LEFT_VALID;
1677 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1649 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
1678 1650
1679 if (isnullstartblock(left.br_startblock)) 1651 if (isnullstartblock(left.br_startblock))
1680 state |= BMAP_LEFT_DELAY; 1652 state |= BMAP_LEFT_DELAY;
@@ -1684,9 +1656,9 @@ xfs_bmap_add_extent_hole_delay(
1684 * Check and set flags if the current (right) segment exists. 1656 * Check and set flags if the current (right) segment exists.
1685 * If it doesn't exist, we're converting the hole at end-of-file. 1657 * If it doesn't exist, we're converting the hole at end-of-file.
1686 */ 1658 */
1687 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { 1659 if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1688 state |= BMAP_RIGHT_VALID; 1660 state |= BMAP_RIGHT_VALID;
1689 xfs_bmbt_get_all(ep, &right); 1661 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
1690 1662
1691 if (isnullstartblock(right.br_startblock)) 1663 if (isnullstartblock(right.br_startblock))
1692 state |= BMAP_RIGHT_DELAY; 1664 state |= BMAP_RIGHT_DELAY;
@@ -1719,21 +1691,21 @@ xfs_bmap_add_extent_hole_delay(
1719 * on the left and on the right. 1691 * on the left and on the right.
1720 * Merge all three into a single extent record. 1692 * Merge all three into a single extent record.
1721 */ 1693 */
1694 --*idx;
1722 temp = left.br_blockcount + new->br_blockcount + 1695 temp = left.br_blockcount + new->br_blockcount +
1723 right.br_blockcount; 1696 right.br_blockcount;
1724 1697
1725 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1698 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1726 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1699 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
1727 oldlen = startblockval(left.br_startblock) + 1700 oldlen = startblockval(left.br_startblock) +
1728 startblockval(new->br_startblock) + 1701 startblockval(new->br_startblock) +
1729 startblockval(right.br_startblock); 1702 startblockval(right.br_startblock);
1730 newlen = xfs_bmap_worst_indlen(ip, temp); 1703 newlen = xfs_bmap_worst_indlen(ip, temp);
1731 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1704 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
1732 nullstartblock((int)newlen)); 1705 nullstartblock((int)newlen));
1733 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1706 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1734 1707
1735 xfs_iext_remove(ip, idx, 1, state); 1708 xfs_iext_remove(ip, *idx + 1, 1, state);
1736 ip->i_df.if_lastex = idx - 1;
1737 break; 1709 break;
1738 1710
1739 case BMAP_LEFT_CONTIG: 1711 case BMAP_LEFT_CONTIG:
@@ -1742,17 +1714,17 @@ xfs_bmap_add_extent_hole_delay(
1742 * on the left. 1714 * on the left.
1743 * Merge the new allocation with the left neighbor. 1715 * Merge the new allocation with the left neighbor.
1744 */ 1716 */
1717 --*idx;
1745 temp = left.br_blockcount + new->br_blockcount; 1718 temp = left.br_blockcount + new->br_blockcount;
1746 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1719
1747 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1720 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1721 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
1748 oldlen = startblockval(left.br_startblock) + 1722 oldlen = startblockval(left.br_startblock) +
1749 startblockval(new->br_startblock); 1723 startblockval(new->br_startblock);
1750 newlen = xfs_bmap_worst_indlen(ip, temp); 1724 newlen = xfs_bmap_worst_indlen(ip, temp);
1751 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1725 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
1752 nullstartblock((int)newlen)); 1726 nullstartblock((int)newlen));
1753 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1727 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1754
1755 ip->i_df.if_lastex = idx - 1;
1756 break; 1728 break;
1757 1729
1758 case BMAP_RIGHT_CONTIG: 1730 case BMAP_RIGHT_CONTIG:
@@ -1761,16 +1733,15 @@ xfs_bmap_add_extent_hole_delay(
1761 * on the right. 1733 * on the right.
1762 * Merge the new allocation with the right neighbor. 1734 * Merge the new allocation with the right neighbor.
1763 */ 1735 */
1764 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1736 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1765 temp = new->br_blockcount + right.br_blockcount; 1737 temp = new->br_blockcount + right.br_blockcount;
1766 oldlen = startblockval(new->br_startblock) + 1738 oldlen = startblockval(new->br_startblock) +
1767 startblockval(right.br_startblock); 1739 startblockval(right.br_startblock);
1768 newlen = xfs_bmap_worst_indlen(ip, temp); 1740 newlen = xfs_bmap_worst_indlen(ip, temp);
1769 xfs_bmbt_set_allf(ep, new->br_startoff, 1741 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1742 new->br_startoff,
1770 nullstartblock((int)newlen), temp, right.br_state); 1743 nullstartblock((int)newlen), temp, right.br_state);
1771 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1744 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1772
1773 ip->i_df.if_lastex = idx;
1774 break; 1745 break;
1775 1746
1776 case 0: 1747 case 0:
@@ -1780,14 +1751,13 @@ xfs_bmap_add_extent_hole_delay(
1780 * Insert a new entry. 1751 * Insert a new entry.
1781 */ 1752 */
1782 oldlen = newlen = 0; 1753 oldlen = newlen = 0;
1783 xfs_iext_insert(ip, idx, 1, new, state); 1754 xfs_iext_insert(ip, *idx, 1, new, state);
1784 ip->i_df.if_lastex = idx;
1785 break; 1755 break;
1786 } 1756 }
1787 if (oldlen != newlen) { 1757 if (oldlen != newlen) {
1788 ASSERT(oldlen > newlen); 1758 ASSERT(oldlen > newlen);
1789 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 1759 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
1790 (int64_t)(oldlen - newlen), rsvd); 1760 (int64_t)(oldlen - newlen), 0);
1791 /* 1761 /*
1792 * Nothing to do for disk quota accounting here. 1762 * Nothing to do for disk quota accounting here.
1793 */ 1763 */
@@ -1803,13 +1773,12 @@ xfs_bmap_add_extent_hole_delay(
1803STATIC int /* error */ 1773STATIC int /* error */
1804xfs_bmap_add_extent_hole_real( 1774xfs_bmap_add_extent_hole_real(
1805 xfs_inode_t *ip, /* incore inode pointer */ 1775 xfs_inode_t *ip, /* incore inode pointer */
1806 xfs_extnum_t idx, /* extent number to update/insert */ 1776 xfs_extnum_t *idx, /* extent number to update/insert */
1807 xfs_btree_cur_t *cur, /* if null, not a btree */ 1777 xfs_btree_cur_t *cur, /* if null, not a btree */
1808 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 1778 xfs_bmbt_irec_t *new, /* new data to add to file extents */
1809 int *logflagsp, /* inode logging flags */ 1779 int *logflagsp, /* inode logging flags */
1810 int whichfork) /* data or attr fork */ 1780 int whichfork) /* data or attr fork */
1811{ 1781{
1812 xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */
1813 int error; /* error return value */ 1782 int error; /* error return value */
1814 int i; /* temp state */ 1783 int i; /* temp state */
1815 xfs_ifork_t *ifp; /* inode fork pointer */ 1784 xfs_ifork_t *ifp; /* inode fork pointer */
@@ -1819,8 +1788,7 @@ xfs_bmap_add_extent_hole_real(
1819 int state; /* state bits, accessed thru macros */ 1788 int state; /* state bits, accessed thru macros */
1820 1789
1821 ifp = XFS_IFORK_PTR(ip, whichfork); 1790 ifp = XFS_IFORK_PTR(ip, whichfork);
1822 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1791 ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
1823 ep = xfs_iext_get_ext(ifp, idx);
1824 state = 0; 1792 state = 0;
1825 1793
1826 if (whichfork == XFS_ATTR_FORK) 1794 if (whichfork == XFS_ATTR_FORK)
@@ -1829,9 +1797,9 @@ xfs_bmap_add_extent_hole_real(
1829 /* 1797 /*
1830 * Check and set flags if this segment has a left neighbor. 1798 * Check and set flags if this segment has a left neighbor.
1831 */ 1799 */
1832 if (idx > 0) { 1800 if (*idx > 0) {
1833 state |= BMAP_LEFT_VALID; 1801 state |= BMAP_LEFT_VALID;
1834 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1802 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
1835 if (isnullstartblock(left.br_startblock)) 1803 if (isnullstartblock(left.br_startblock))
1836 state |= BMAP_LEFT_DELAY; 1804 state |= BMAP_LEFT_DELAY;
1837 } 1805 }
@@ -1840,9 +1808,9 @@ xfs_bmap_add_extent_hole_real(
1840 * Check and set flags if this segment has a current value. 1808 * Check and set flags if this segment has a current value.
1841 * Not true if we're inserting into the "hole" at eof. 1809 * Not true if we're inserting into the "hole" at eof.
1842 */ 1810 */
1843 if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { 1811 if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1844 state |= BMAP_RIGHT_VALID; 1812 state |= BMAP_RIGHT_VALID;
1845 xfs_bmbt_get_all(ep, &right); 1813 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
1846 if (isnullstartblock(right.br_startblock)) 1814 if (isnullstartblock(right.br_startblock))
1847 state |= BMAP_RIGHT_DELAY; 1815 state |= BMAP_RIGHT_DELAY;
1848 } 1816 }
@@ -1879,14 +1847,15 @@ xfs_bmap_add_extent_hole_real(
1879 * left and on the right. 1847 * left and on the right.
1880 * Merge all three into a single extent record. 1848 * Merge all three into a single extent record.
1881 */ 1849 */
1882 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1850 --*idx;
1883 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1851 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1852 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1884 left.br_blockcount + new->br_blockcount + 1853 left.br_blockcount + new->br_blockcount +
1885 right.br_blockcount); 1854 right.br_blockcount);
1886 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1855 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1856
1857 xfs_iext_remove(ip, *idx + 1, 1, state);
1887 1858
1888 xfs_iext_remove(ip, idx, 1, state);
1889 ifp->if_lastex = idx - 1;
1890 XFS_IFORK_NEXT_SET(ip, whichfork, 1859 XFS_IFORK_NEXT_SET(ip, whichfork,
1891 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 1860 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
1892 if (cur == NULL) { 1861 if (cur == NULL) {
@@ -1921,12 +1890,12 @@ xfs_bmap_add_extent_hole_real(
1921 * on the left. 1890 * on the left.
1922 * Merge the new allocation with the left neighbor. 1891 * Merge the new allocation with the left neighbor.
1923 */ 1892 */
1924 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); 1893 --*idx;
1925 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1894 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1895 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1926 left.br_blockcount + new->br_blockcount); 1896 left.br_blockcount + new->br_blockcount);
1927 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); 1897 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1928 1898
1929 ifp->if_lastex = idx - 1;
1930 if (cur == NULL) { 1899 if (cur == NULL) {
1931 rval = xfs_ilog_fext(whichfork); 1900 rval = xfs_ilog_fext(whichfork);
1932 } else { 1901 } else {
@@ -1952,13 +1921,13 @@ xfs_bmap_add_extent_hole_real(
1952 * on the right. 1921 * on the right.
1953 * Merge the new allocation with the right neighbor. 1922 * Merge the new allocation with the right neighbor.
1954 */ 1923 */
1955 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 1924 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1956 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 1925 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1926 new->br_startoff, new->br_startblock,
1957 new->br_blockcount + right.br_blockcount, 1927 new->br_blockcount + right.br_blockcount,
1958 right.br_state); 1928 right.br_state);
1959 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 1929 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1960 1930
1961 ifp->if_lastex = idx;
1962 if (cur == NULL) { 1931 if (cur == NULL) {
1963 rval = xfs_ilog_fext(whichfork); 1932 rval = xfs_ilog_fext(whichfork);
1964 } else { 1933 } else {
@@ -1984,8 +1953,7 @@ xfs_bmap_add_extent_hole_real(
1984 * real allocation. 1953 * real allocation.
1985 * Insert a new entry. 1954 * Insert a new entry.
1986 */ 1955 */
1987 xfs_iext_insert(ip, idx, 1, new, state); 1956 xfs_iext_insert(ip, *idx, 1, new, state);
1988 ifp->if_lastex = idx;
1989 XFS_IFORK_NEXT_SET(ip, whichfork, 1957 XFS_IFORK_NEXT_SET(ip, whichfork,
1990 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 1958 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
1991 if (cur == NULL) { 1959 if (cur == NULL) {
@@ -2833,13 +2801,12 @@ STATIC int /* error */
2833xfs_bmap_del_extent( 2801xfs_bmap_del_extent(
2834 xfs_inode_t *ip, /* incore inode pointer */ 2802 xfs_inode_t *ip, /* incore inode pointer */
2835 xfs_trans_t *tp, /* current transaction pointer */ 2803 xfs_trans_t *tp, /* current transaction pointer */
2836 xfs_extnum_t idx, /* extent number to update/delete */ 2804 xfs_extnum_t *idx, /* extent number to update/delete */
2837 xfs_bmap_free_t *flist, /* list of extents to be freed */ 2805 xfs_bmap_free_t *flist, /* list of extents to be freed */
2838 xfs_btree_cur_t *cur, /* if null, not a btree */ 2806 xfs_btree_cur_t *cur, /* if null, not a btree */
2839 xfs_bmbt_irec_t *del, /* data to remove from extents */ 2807 xfs_bmbt_irec_t *del, /* data to remove from extents */
2840 int *logflagsp, /* inode logging flags */ 2808 int *logflagsp, /* inode logging flags */
2841 int whichfork, /* data or attr fork */ 2809 int whichfork) /* data or attr fork */
2842 int rsvd) /* OK to allocate reserved blocks */
2843{ 2810{
2844 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ 2811 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
2845 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ 2812 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
@@ -2870,10 +2837,10 @@ xfs_bmap_del_extent(
2870 2837
2871 mp = ip->i_mount; 2838 mp = ip->i_mount;
2872 ifp = XFS_IFORK_PTR(ip, whichfork); 2839 ifp = XFS_IFORK_PTR(ip, whichfork);
2873 ASSERT((idx >= 0) && (idx < ifp->if_bytes / 2840 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
2874 (uint)sizeof(xfs_bmbt_rec_t))); 2841 (uint)sizeof(xfs_bmbt_rec_t)));
2875 ASSERT(del->br_blockcount > 0); 2842 ASSERT(del->br_blockcount > 0);
2876 ep = xfs_iext_get_ext(ifp, idx); 2843 ep = xfs_iext_get_ext(ifp, *idx);
2877 xfs_bmbt_get_all(ep, &got); 2844 xfs_bmbt_get_all(ep, &got);
2878 ASSERT(got.br_startoff <= del->br_startoff); 2845 ASSERT(got.br_startoff <= del->br_startoff);
2879 del_endoff = del->br_startoff + del->br_blockcount; 2846 del_endoff = del->br_startoff + del->br_blockcount;
@@ -2947,11 +2914,12 @@ xfs_bmap_del_extent(
2947 /* 2914 /*
2948 * Matches the whole extent. Delete the entry. 2915 * Matches the whole extent. Delete the entry.
2949 */ 2916 */
2950 xfs_iext_remove(ip, idx, 1, 2917 xfs_iext_remove(ip, *idx, 1,
2951 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); 2918 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
2952 ifp->if_lastex = idx; 2919 --*idx;
2953 if (delay) 2920 if (delay)
2954 break; 2921 break;
2922
2955 XFS_IFORK_NEXT_SET(ip, whichfork, 2923 XFS_IFORK_NEXT_SET(ip, whichfork,
2956 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2924 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2957 flags |= XFS_ILOG_CORE; 2925 flags |= XFS_ILOG_CORE;
@@ -2968,21 +2936,20 @@ xfs_bmap_del_extent(
2968 /* 2936 /*
2969 * Deleting the first part of the extent. 2937 * Deleting the first part of the extent.
2970 */ 2938 */
2971 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 2939 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2972 xfs_bmbt_set_startoff(ep, del_endoff); 2940 xfs_bmbt_set_startoff(ep, del_endoff);
2973 temp = got.br_blockcount - del->br_blockcount; 2941 temp = got.br_blockcount - del->br_blockcount;
2974 xfs_bmbt_set_blockcount(ep, temp); 2942 xfs_bmbt_set_blockcount(ep, temp);
2975 ifp->if_lastex = idx;
2976 if (delay) { 2943 if (delay) {
2977 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2944 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2978 da_old); 2945 da_old);
2979 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 2946 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2980 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 2947 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2981 da_new = temp; 2948 da_new = temp;
2982 break; 2949 break;
2983 } 2950 }
2984 xfs_bmbt_set_startblock(ep, del_endblock); 2951 xfs_bmbt_set_startblock(ep, del_endblock);
2985 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 2952 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2986 if (!cur) { 2953 if (!cur) {
2987 flags |= xfs_ilog_fext(whichfork); 2954 flags |= xfs_ilog_fext(whichfork);
2988 break; 2955 break;
@@ -2998,18 +2965,17 @@ xfs_bmap_del_extent(
2998 * Deleting the last part of the extent. 2965 * Deleting the last part of the extent.
2999 */ 2966 */
3000 temp = got.br_blockcount - del->br_blockcount; 2967 temp = got.br_blockcount - del->br_blockcount;
3001 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 2968 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
3002 xfs_bmbt_set_blockcount(ep, temp); 2969 xfs_bmbt_set_blockcount(ep, temp);
3003 ifp->if_lastex = idx;
3004 if (delay) { 2970 if (delay) {
3005 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2971 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3006 da_old); 2972 da_old);
3007 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 2973 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3008 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 2974 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3009 da_new = temp; 2975 da_new = temp;
3010 break; 2976 break;
3011 } 2977 }
3012 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 2978 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3013 if (!cur) { 2979 if (!cur) {
3014 flags |= xfs_ilog_fext(whichfork); 2980 flags |= xfs_ilog_fext(whichfork);
3015 break; 2981 break;
@@ -3026,7 +2992,7 @@ xfs_bmap_del_extent(
3026 * Deleting the middle of the extent. 2992 * Deleting the middle of the extent.
3027 */ 2993 */
3028 temp = del->br_startoff - got.br_startoff; 2994 temp = del->br_startoff - got.br_startoff;
3029 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); 2995 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
3030 xfs_bmbt_set_blockcount(ep, temp); 2996 xfs_bmbt_set_blockcount(ep, temp);
3031 new.br_startoff = del_endoff; 2997 new.br_startoff = del_endoff;
3032 temp2 = got_endoff - del_endoff; 2998 temp2 = got_endoff - del_endoff;
@@ -3113,9 +3079,9 @@ xfs_bmap_del_extent(
3113 } 3079 }
3114 } 3080 }
3115 } 3081 }
3116 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); 3082 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3117 xfs_iext_insert(ip, idx + 1, 1, &new, state); 3083 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
3118 ifp->if_lastex = idx + 1; 3084 ++*idx;
3119 break; 3085 break;
3120 } 3086 }
3121 /* 3087 /*
@@ -3142,7 +3108,7 @@ xfs_bmap_del_extent(
3142 ASSERT(da_old >= da_new); 3108 ASSERT(da_old >= da_new);
3143 if (da_old > da_new) { 3109 if (da_old > da_new) {
3144 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 3110 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
3145 (int64_t)(da_old - da_new), rsvd); 3111 (int64_t)(da_old - da_new), 0);
3146 } 3112 }
3147done: 3113done:
3148 *logflagsp = flags; 3114 *logflagsp = flags;
@@ -4562,29 +4528,24 @@ xfs_bmapi(
4562 if (rt) { 4528 if (rt) {
4563 error = xfs_mod_incore_sb(mp, 4529 error = xfs_mod_incore_sb(mp,
4564 XFS_SBS_FREXTENTS, 4530 XFS_SBS_FREXTENTS,
4565 -((int64_t)extsz), (flags & 4531 -((int64_t)extsz), 0);
4566 XFS_BMAPI_RSVBLOCKS));
4567 } else { 4532 } else {
4568 error = xfs_icsb_modify_counters(mp, 4533 error = xfs_icsb_modify_counters(mp,
4569 XFS_SBS_FDBLOCKS, 4534 XFS_SBS_FDBLOCKS,
4570 -((int64_t)alen), (flags & 4535 -((int64_t)alen), 0);
4571 XFS_BMAPI_RSVBLOCKS));
4572 } 4536 }
4573 if (!error) { 4537 if (!error) {
4574 error = xfs_icsb_modify_counters(mp, 4538 error = xfs_icsb_modify_counters(mp,
4575 XFS_SBS_FDBLOCKS, 4539 XFS_SBS_FDBLOCKS,
4576 -((int64_t)indlen), (flags & 4540 -((int64_t)indlen), 0);
4577 XFS_BMAPI_RSVBLOCKS));
4578 if (error && rt) 4541 if (error && rt)
4579 xfs_mod_incore_sb(mp, 4542 xfs_mod_incore_sb(mp,
4580 XFS_SBS_FREXTENTS, 4543 XFS_SBS_FREXTENTS,
4581 (int64_t)extsz, (flags & 4544 (int64_t)extsz, 0);
4582 XFS_BMAPI_RSVBLOCKS));
4583 else if (error) 4545 else if (error)
4584 xfs_icsb_modify_counters(mp, 4546 xfs_icsb_modify_counters(mp,
4585 XFS_SBS_FDBLOCKS, 4547 XFS_SBS_FDBLOCKS,
4586 (int64_t)alen, (flags & 4548 (int64_t)alen, 0);
4587 XFS_BMAPI_RSVBLOCKS));
4588 } 4549 }
4589 4550
4590 if (error) { 4551 if (error) {
@@ -4701,13 +4662,12 @@ xfs_bmapi(
4701 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) 4662 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
4702 got.br_state = XFS_EXT_UNWRITTEN; 4663 got.br_state = XFS_EXT_UNWRITTEN;
4703 } 4664 }
4704 error = xfs_bmap_add_extent(ip, lastx, &cur, &got, 4665 error = xfs_bmap_add_extent(ip, &lastx, &cur, &got,
4705 firstblock, flist, &tmp_logflags, 4666 firstblock, flist, &tmp_logflags,
4706 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4667 whichfork);
4707 logflags |= tmp_logflags; 4668 logflags |= tmp_logflags;
4708 if (error) 4669 if (error)
4709 goto error0; 4670 goto error0;
4710 lastx = ifp->if_lastex;
4711 ep = xfs_iext_get_ext(ifp, lastx); 4671 ep = xfs_iext_get_ext(ifp, lastx);
4712 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4672 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4713 xfs_bmbt_get_all(ep, &got); 4673 xfs_bmbt_get_all(ep, &got);
@@ -4803,13 +4763,12 @@ xfs_bmapi(
4803 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4763 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4804 ? XFS_EXT_NORM 4764 ? XFS_EXT_NORM
4805 : XFS_EXT_UNWRITTEN; 4765 : XFS_EXT_UNWRITTEN;
4806 error = xfs_bmap_add_extent(ip, lastx, &cur, mval, 4766 error = xfs_bmap_add_extent(ip, &lastx, &cur, mval,
4807 firstblock, flist, &tmp_logflags, 4767 firstblock, flist, &tmp_logflags,
4808 whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); 4768 whichfork);
4809 logflags |= tmp_logflags; 4769 logflags |= tmp_logflags;
4810 if (error) 4770 if (error)
4811 goto error0; 4771 goto error0;
4812 lastx = ifp->if_lastex;
4813 ep = xfs_iext_get_ext(ifp, lastx); 4772 ep = xfs_iext_get_ext(ifp, lastx);
4814 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4773 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4815 xfs_bmbt_get_all(ep, &got); 4774 xfs_bmbt_get_all(ep, &got);
@@ -4868,14 +4827,14 @@ xfs_bmapi(
4868 /* 4827 /*
4869 * Else go on to the next record. 4828 * Else go on to the next record.
4870 */ 4829 */
4871 ep = xfs_iext_get_ext(ifp, ++lastx);
4872 prev = got; 4830 prev = got;
4873 if (lastx >= nextents) 4831 if (++lastx < nextents) {
4874 eof = 1; 4832 ep = xfs_iext_get_ext(ifp, lastx);
4875 else
4876 xfs_bmbt_get_all(ep, &got); 4833 xfs_bmbt_get_all(ep, &got);
4834 } else {
4835 eof = 1;
4836 }
4877 } 4837 }
4878 ifp->if_lastex = lastx;
4879 *nmap = n; 4838 *nmap = n;
4880 /* 4839 /*
4881 * Transform from btree to extents, give it cur. 4840 * Transform from btree to extents, give it cur.
@@ -4984,7 +4943,6 @@ xfs_bmapi_single(
4984 ASSERT(!isnullstartblock(got.br_startblock)); 4943 ASSERT(!isnullstartblock(got.br_startblock));
4985 ASSERT(bno < got.br_startoff + got.br_blockcount); 4944 ASSERT(bno < got.br_startoff + got.br_blockcount);
4986 *fsb = got.br_startblock + (bno - got.br_startoff); 4945 *fsb = got.br_startblock + (bno - got.br_startoff);
4987 ifp->if_lastex = lastx;
4988 return 0; 4946 return 0;
4989} 4947}
4990 4948
@@ -5026,7 +4984,6 @@ xfs_bunmapi(
5026 int tmp_logflags; /* partial logging flags */ 4984 int tmp_logflags; /* partial logging flags */
5027 int wasdel; /* was a delayed alloc extent */ 4985 int wasdel; /* was a delayed alloc extent */
5028 int whichfork; /* data or attribute fork */ 4986 int whichfork; /* data or attribute fork */
5029 int rsvd; /* OK to allocate reserved blocks */
5030 xfs_fsblock_t sum; 4987 xfs_fsblock_t sum;
5031 4988
5032 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); 4989 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
@@ -5044,7 +5001,7 @@ xfs_bunmapi(
5044 mp = ip->i_mount; 5001 mp = ip->i_mount;
5045 if (XFS_FORCED_SHUTDOWN(mp)) 5002 if (XFS_FORCED_SHUTDOWN(mp))
5046 return XFS_ERROR(EIO); 5003 return XFS_ERROR(EIO);
5047 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; 5004
5048 ASSERT(len > 0); 5005 ASSERT(len > 0);
5049 ASSERT(nexts >= 0); 5006 ASSERT(nexts >= 0);
5050 ASSERT(ifp->if_ext_max == 5007 ASSERT(ifp->if_ext_max ==
@@ -5160,9 +5117,9 @@ xfs_bunmapi(
5160 del.br_blockcount = mod; 5117 del.br_blockcount = mod;
5161 } 5118 }
5162 del.br_state = XFS_EXT_UNWRITTEN; 5119 del.br_state = XFS_EXT_UNWRITTEN;
5163 error = xfs_bmap_add_extent(ip, lastx, &cur, &del, 5120 error = xfs_bmap_add_extent(ip, &lastx, &cur, &del,
5164 firstblock, flist, &logflags, 5121 firstblock, flist, &logflags,
5165 XFS_DATA_FORK, 0); 5122 XFS_DATA_FORK);
5166 if (error) 5123 if (error)
5167 goto error0; 5124 goto error0;
5168 goto nodelete; 5125 goto nodelete;
@@ -5188,9 +5145,12 @@ xfs_bunmapi(
5188 */ 5145 */
5189 ASSERT(bno >= del.br_blockcount); 5146 ASSERT(bno >= del.br_blockcount);
5190 bno -= del.br_blockcount; 5147 bno -= del.br_blockcount;
5191 if (bno < got.br_startoff) { 5148 if (got.br_startoff > bno) {
5192 if (--lastx >= 0) 5149 if (--lastx >= 0) {
5193 xfs_bmbt_get_all(--ep, &got); 5150 ep = xfs_iext_get_ext(ifp,
5151 lastx);
5152 xfs_bmbt_get_all(ep, &got);
5153 }
5194 } 5154 }
5195 continue; 5155 continue;
5196 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5156 } else if (del.br_state == XFS_EXT_UNWRITTEN) {
@@ -5214,18 +5174,19 @@ xfs_bunmapi(
5214 prev.br_startoff = start; 5174 prev.br_startoff = start;
5215 } 5175 }
5216 prev.br_state = XFS_EXT_UNWRITTEN; 5176 prev.br_state = XFS_EXT_UNWRITTEN;
5217 error = xfs_bmap_add_extent(ip, lastx - 1, &cur, 5177 lastx--;
5178 error = xfs_bmap_add_extent(ip, &lastx, &cur,
5218 &prev, firstblock, flist, &logflags, 5179 &prev, firstblock, flist, &logflags,
5219 XFS_DATA_FORK, 0); 5180 XFS_DATA_FORK);
5220 if (error) 5181 if (error)
5221 goto error0; 5182 goto error0;
5222 goto nodelete; 5183 goto nodelete;
5223 } else { 5184 } else {
5224 ASSERT(del.br_state == XFS_EXT_NORM); 5185 ASSERT(del.br_state == XFS_EXT_NORM);
5225 del.br_state = XFS_EXT_UNWRITTEN; 5186 del.br_state = XFS_EXT_UNWRITTEN;
5226 error = xfs_bmap_add_extent(ip, lastx, &cur, 5187 error = xfs_bmap_add_extent(ip, &lastx, &cur,
5227 &del, firstblock, flist, &logflags, 5188 &del, firstblock, flist, &logflags,
5228 XFS_DATA_FORK, 0); 5189 XFS_DATA_FORK);
5229 if (error) 5190 if (error)
5230 goto error0; 5191 goto error0;
5231 goto nodelete; 5192 goto nodelete;
@@ -5240,13 +5201,13 @@ xfs_bunmapi(
5240 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5201 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5241 do_div(rtexts, mp->m_sb.sb_rextsize); 5202 do_div(rtexts, mp->m_sb.sb_rextsize);
5242 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5203 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5243 (int64_t)rtexts, rsvd); 5204 (int64_t)rtexts, 0);
5244 (void)xfs_trans_reserve_quota_nblks(NULL, 5205 (void)xfs_trans_reserve_quota_nblks(NULL,
5245 ip, -((long)del.br_blockcount), 0, 5206 ip, -((long)del.br_blockcount), 0,
5246 XFS_QMOPT_RES_RTBLKS); 5207 XFS_QMOPT_RES_RTBLKS);
5247 } else { 5208 } else {
5248 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 5209 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5249 (int64_t)del.br_blockcount, rsvd); 5210 (int64_t)del.br_blockcount, 0);
5250 (void)xfs_trans_reserve_quota_nblks(NULL, 5211 (void)xfs_trans_reserve_quota_nblks(NULL,
5251 ip, -((long)del.br_blockcount), 0, 5212 ip, -((long)del.br_blockcount), 0,
5252 XFS_QMOPT_RES_REGBLKS); 5213 XFS_QMOPT_RES_REGBLKS);
@@ -5277,31 +5238,29 @@ xfs_bunmapi(
5277 error = XFS_ERROR(ENOSPC); 5238 error = XFS_ERROR(ENOSPC);
5278 goto error0; 5239 goto error0;
5279 } 5240 }
5280 error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, 5241 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5281 &tmp_logflags, whichfork, rsvd); 5242 &tmp_logflags, whichfork);
5282 logflags |= tmp_logflags; 5243 logflags |= tmp_logflags;
5283 if (error) 5244 if (error)
5284 goto error0; 5245 goto error0;
5285 bno = del.br_startoff - 1; 5246 bno = del.br_startoff - 1;
5286nodelete: 5247nodelete:
5287 lastx = ifp->if_lastex;
5288 /* 5248 /*
5289 * If not done go on to the next (previous) record. 5249 * If not done go on to the next (previous) record.
5290 * Reset ep in case the extents array was re-alloced.
5291 */ 5250 */
5292 ep = xfs_iext_get_ext(ifp, lastx);
5293 if (bno != (xfs_fileoff_t)-1 && bno >= start) { 5251 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5294 if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || 5252 if (lastx >= 0) {
5295 xfs_bmbt_get_startoff(ep) > bno) { 5253 ep = xfs_iext_get_ext(ifp, lastx);
5296 if (--lastx >= 0) 5254 if (xfs_bmbt_get_startoff(ep) > bno) {
5297 ep = xfs_iext_get_ext(ifp, lastx); 5255 if (--lastx >= 0)
5298 } 5256 ep = xfs_iext_get_ext(ifp,
5299 if (lastx >= 0) 5257 lastx);
5258 }
5300 xfs_bmbt_get_all(ep, &got); 5259 xfs_bmbt_get_all(ep, &got);
5260 }
5301 extno++; 5261 extno++;
5302 } 5262 }
5303 } 5263 }
5304 ifp->if_lastex = lastx;
5305 *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; 5264 *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5306 ASSERT(ifp->if_ext_max == 5265 ASSERT(ifp->if_ext_max ==
5307 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 5266 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 3651191daea1..c62234bde053 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -69,7 +69,6 @@ typedef struct xfs_bmap_free
69#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ 69#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
70#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ 70#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
71#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ 71#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
72#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */
73#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ 72#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
74#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ 73#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
75 /* combine contig. space */ 74 /* combine contig. space */
@@ -87,7 +86,6 @@ typedef struct xfs_bmap_free
87 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 86 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
88 { XFS_BMAPI_METADATA, "METADATA" }, \ 87 { XFS_BMAPI_METADATA, "METADATA" }, \
89 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ 88 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
90 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
91 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 89 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
92 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 90 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
93 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 91 { XFS_BMAPI_CONTIG, "CONTIG" }, \
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
202 xfs_inode_t *tip, /* tmp inode */ 202 xfs_inode_t *tip, /* tmp inode */
203 xfs_swapext_t *sxp) 203 xfs_swapext_t *sxp)
204{ 204{
205 xfs_mount_t *mp; 205 xfs_mount_t *mp = ip->i_mount;
206 xfs_trans_t *tp; 206 xfs_trans_t *tp;
207 xfs_bstat_t *sbp = &sxp->sx_stat; 207 xfs_bstat_t *sbp = &sxp->sx_stat;
208 xfs_ifork_t *tempifp, *ifp, *tifp; 208 xfs_ifork_t *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
212 int taforkblks = 0; 212 int taforkblks = 0;
213 __uint64_t tmp; 213 __uint64_t tmp;
214 214
215 mp = ip->i_mount;
216
217 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 215 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
218 if (!tempifp) { 216 if (!tempifp) {
219 error = XFS_ERROR(ENOMEM); 217 error = XFS_ERROR(ENOMEM);
220 goto out; 218 goto out;
221 } 219 }
222 220
223 sbp = &sxp->sx_stat;
224
225 /* 221 /*
226 * we have to do two separate lock calls here to keep lockdep 222 * we have to do two separate lock calls here to keep lockdep
227 * happy. If we try to get all the locks in one call, lock will 223 * happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..a098a20ca63e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -920,7 +920,6 @@ xfs_iread_extents(
920 /* 920 /*
921 * We know that the size is valid (it's checked in iformat_btree) 921 * We know that the size is valid (it's checked in iformat_btree)
922 */ 922 */
923 ifp->if_lastex = NULLEXTNUM;
924 ifp->if_bytes = ifp->if_real_bytes = 0; 923 ifp->if_bytes = ifp->if_real_bytes = 0;
925 ifp->if_flags |= XFS_IFEXTENTS; 924 ifp->if_flags |= XFS_IFEXTENTS;
926 xfs_iext_add(ifp, 0, nextents); 925 xfs_iext_add(ifp, 0, nextents);
@@ -1354,7 +1353,7 @@ xfs_itruncate_start(
1354 return 0; 1353 return 0;
1355 } 1354 }
1356 last_byte = xfs_file_last_byte(ip); 1355 last_byte = xfs_file_last_byte(ip);
1357 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); 1356 trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
1358 if (last_byte > toss_start) { 1357 if (last_byte > toss_start) {
1359 if (flags & XFS_ITRUNC_DEFINITE) { 1358 if (flags & XFS_ITRUNC_DEFINITE) {
1360 xfs_tosspages(ip, toss_start, 1359 xfs_tosspages(ip, toss_start,
@@ -1470,7 +1469,7 @@ xfs_itruncate_finish(
1470 * file but the log buffers containing the free and reallocation 1469 * file but the log buffers containing the free and reallocation
1471 * don't, then we'd end up with garbage in the blocks being freed. 1470 * don't, then we'd end up with garbage in the blocks being freed.
1472 * As long as we make the new_size permanent before actually 1471 * As long as we make the new_size permanent before actually
1473 * freeing any blocks it doesn't matter if they get writtten to. 1472 * freeing any blocks it doesn't matter if they get written to.
1474 * 1473 *
1475 * The callers must signal into us whether or not the size 1474 * The callers must signal into us whether or not the size
1476 * setting here must be synchronous. There are a few cases 1475 * setting here must be synchronous. There are a few cases
@@ -2558,12 +2557,9 @@ xfs_iflush_fork(
2558 case XFS_DINODE_FMT_EXTENTS: 2557 case XFS_DINODE_FMT_EXTENTS:
2559 ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2558 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2560 !(iip->ili_format.ilf_fields & extflag[whichfork])); 2559 !(iip->ili_format.ilf_fields & extflag[whichfork]));
2561 ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
2562 (ifp->if_bytes == 0));
2563 ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
2564 (ifp->if_bytes > 0));
2565 if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2560 if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
2566 (ifp->if_bytes > 0)) { 2561 (ifp->if_bytes > 0)) {
2562 ASSERT(xfs_iext_get_ext(ifp, 0));
2567 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2563 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
2568 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 2564 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
2569 whichfork); 2565 whichfork);
@@ -3112,6 +3108,8 @@ xfs_iext_get_ext(
3112 xfs_extnum_t idx) /* index of target extent */ 3108 xfs_extnum_t idx) /* index of target extent */
3113{ 3109{
3114 ASSERT(idx >= 0); 3110 ASSERT(idx >= 0);
3111 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
3112
3115 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 3113 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
3116 return ifp->if_u1.if_ext_irec->er_extbuf; 3114 return ifp->if_u1.if_ext_irec->er_extbuf;
3117 } else if (ifp->if_flags & XFS_IFEXTIREC) { 3115 } else if (ifp->if_flags & XFS_IFEXTIREC) {
@@ -3191,7 +3189,6 @@ xfs_iext_add(
3191 } 3189 }
3192 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3190 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
3193 ifp->if_real_bytes = 0; 3191 ifp->if_real_bytes = 0;
3194 ifp->if_lastex = nextents + ext_diff;
3195 } 3192 }
3196 /* 3193 /*
3197 * Otherwise use a linear (direct) extent list. 3194 * Otherwise use a linear (direct) extent list.
@@ -3886,8 +3883,10 @@ xfs_iext_idx_to_irec(
3886 xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 3883 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
3887 3884
3888 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3885 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3889 ASSERT(page_idx >= 0 && page_idx <= 3886 ASSERT(page_idx >= 0);
3890 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 3887 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
3888 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
3889
3891 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3890 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3892 erp_idx = 0; 3891 erp_idx = 0;
3893 low = 0; 3892 low = 0;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ff4e2a30227d..3ae6d58e5473 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -67,7 +67,6 @@ typedef struct xfs_ifork {
67 short if_broot_bytes; /* bytes allocated for root */ 67 short if_broot_bytes; /* bytes allocated for root */
68 unsigned char if_flags; /* per-fork flags */ 68 unsigned char if_flags; /* per-fork flags */
69 unsigned char if_ext_max; /* max # of extent records */ 69 unsigned char if_ext_max; /* max # of extent records */
70 xfs_extnum_t if_lastex; /* last if_extents used */
71 union { 70 union {
72 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ 71 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
73 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ 72 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
970{ 970{
971 xfs_inode_log_item_t *iip = ip->i_itemp; 971 xfs_inode_log_item_t *iip = ip->i_itemp;
972 972
973 iip = ip->i_itemp;
974 if (iip) { 973 if (iip) {
975 struct xfs_ail *ailp = iip->ili_item.li_ailp; 974 struct xfs_ail *ailp = iip->ili_item.li_ailp;
976 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 975 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
1449 1449
1450 xlog_cil_destroy(log); 1450 xlog_cil_destroy(log);
1451 1451
1452 /*
1453 * always need to ensure that the extra buffer does not point to memory
1454 * owned by another log buffer before we free it.
1455 */
1456 xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
1457 xfs_buf_free(log->l_xbuf);
1458
1452 iclog = log->l_iclog; 1459 iclog = log->l_iclog;
1453 for (i=0; i<log->l_iclog_bufs; i++) { 1460 for (i=0; i<log->l_iclog_bufs; i++) {
1454 xfs_buf_free(iclog->ic_bp); 1461 xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
1458 } 1465 }
1459 spinlock_destroy(&log->l_icloglock); 1466 spinlock_destroy(&log->l_icloglock);
1460 1467
1461 xfs_buf_free(log->l_xbuf);
1462 log->l_mp->m_log = NULL; 1468 log->l_mp->m_log = NULL;
1463 kmem_free(log); 1469 kmem_free(log);
1464} /* xlog_dealloc_log */ 1470} /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
3248 return ticket; 3254 return ticket;
3249} 3255}
3250 3256
3251xlog_tid_t
3252xfs_log_get_trans_ident(
3253 struct xfs_trans *tp)
3254{
3255 return tp->t_ticket->t_tid;
3256}
3257
3258/* 3257/*
3259 * Allocate and initialise a new log ticket. 3258 * Allocate and initialise a new log ticket.
3260 */ 3259 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 190void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193
194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 193 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 194 xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..c7755d5a5fbe 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -29,6 +29,7 @@
29#include "xfs_mount.h" 29#include "xfs_mount.h"
30#include "xfs_error.h" 30#include "xfs_error.h"
31#include "xfs_alloc.h" 31#include "xfs_alloc.h"
32#include "xfs_discard.h"
32 33
33/* 34/*
34 * Perform initial CIL structure initialisation. If the CIL is not 35 * Perform initial CIL structure initialisation. If the CIL is not
@@ -361,19 +362,28 @@ xlog_cil_committed(
361 int abort) 362 int abort)
362{ 363{
363 struct xfs_cil_ctx *ctx = args; 364 struct xfs_cil_ctx *ctx = args;
364 struct xfs_busy_extent *busyp, *n; 365 struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
365 366
366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 367 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
367 ctx->start_lsn, abort); 368 ctx->start_lsn, abort);
368 369
369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 370 xfs_alloc_busy_sort(&ctx->busy_extents);
370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 371 xfs_alloc_busy_clear(mp, &ctx->busy_extents,
372 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
371 373
372 spin_lock(&ctx->cil->xc_cil_lock); 374 spin_lock(&ctx->cil->xc_cil_lock);
373 list_del(&ctx->committing); 375 list_del(&ctx->committing);
374 spin_unlock(&ctx->cil->xc_cil_lock); 376 spin_unlock(&ctx->cil->xc_cil_lock);
375 377
376 xlog_cil_free_logvec(ctx->lv_chain); 378 xlog_cil_free_logvec(ctx->lv_chain);
379
380 if (!list_empty(&ctx->busy_extents)) {
381 ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
382
383 xfs_discard_extents(mp, &ctx->busy_extents);
384 xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
385 }
386
377 kmem_free(ctx); 387 kmem_free(ctx);
378} 388}
379 389
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
148 148
149typedef __uint32_t xlog_tid_t;
150
149#ifdef __KERNEL__ 151#ifdef __KERNEL__
150/* 152/*
151 * Below are states for covering allocation transactions. 153 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
205} 205}
206 206
207/* 207/*
208 * Read at an offset into the buffer. Returns with the buffer in it's original
209 * state regardless of the result of the read.
210 */
211STATIC int
212xlog_bread_offset(
213 xlog_t *log,
214 xfs_daddr_t blk_no, /* block to read from */
215 int nbblks, /* blocks to read */
216 xfs_buf_t *bp,
217 xfs_caddr_t offset)
218{
219 xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
220 int orig_len = bp->b_buffer_length;
221 int error, error2;
222
223 error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
224 if (error)
225 return error;
226
227 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
228
229 /* must reset buffer pointer even on error */
230 error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
231 if (error)
232 return error;
233 return error2;
234}
235
236/*
208 * Write out the buffer at the given block for the given number of blocks. 237 * Write out the buffer at the given block for the given number of blocks.
209 * The buffer is kept locked across the write and is returned locked. 238 * The buffer is kept locked across the write and is returned locked.
210 * This can only be used for synchronous log writes. 239 * This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
1229 */ 1258 */
1230 ealign = round_down(end_block, sectbb); 1259 ealign = round_down(end_block, sectbb);
1231 if (j == 0 && (start_block + endcount > ealign)) { 1260 if (j == 0 && (start_block + endcount > ealign)) {
1232 offset = XFS_BUF_PTR(bp); 1261 offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
1233 balign = BBTOB(ealign - start_block); 1262 error = xlog_bread_offset(log, ealign, sectbb,
1234 error = XFS_BUF_SET_PTR(bp, offset + balign, 1263 bp, offset);
1235 BBTOB(sectbb));
1236 if (error) 1264 if (error)
1237 break; 1265 break;
1238 1266
1239 error = xlog_bread_noalign(log, ealign, sectbb, bp);
1240 if (error)
1241 break;
1242
1243 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1244 if (error)
1245 break;
1246 } 1267 }
1247 1268
1248 offset = xlog_align(log, start_block, endcount, bp); 1269 offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
3448 * - order is important. 3469 * - order is important.
3449 */ 3470 */
3450 wrapped_hblks = hblks - split_hblks; 3471 wrapped_hblks = hblks - split_hblks;
3451 error = XFS_BUF_SET_PTR(hbp, 3472 error = xlog_bread_offset(log, 0,
3452 offset + BBTOB(split_hblks), 3473 wrapped_hblks, hbp,
3453 BBTOB(hblks - split_hblks)); 3474 offset + BBTOB(split_hblks));
3454 if (error)
3455 goto bread_err2;
3456
3457 error = xlog_bread_noalign(log, 0,
3458 wrapped_hblks, hbp);
3459 if (error)
3460 goto bread_err2;
3461
3462 error = XFS_BUF_SET_PTR(hbp, offset,
3463 BBTOB(hblks));
3464 if (error) 3475 if (error)
3465 goto bread_err2; 3476 goto bread_err2;
3466 } 3477 }
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
3511 * _first_, then the log start (LR header end) 3522 * _first_, then the log start (LR header end)
3512 * - order is important. 3523 * - order is important.
3513 */ 3524 */
3514 error = XFS_BUF_SET_PTR(dbp, 3525 error = xlog_bread_offset(log, 0,
3515 offset + BBTOB(split_bblks), 3526 bblks - split_bblks, hbp,
3516 BBTOB(bblks - split_bblks)); 3527 offset + BBTOB(split_bblks));
3517 if (error)
3518 goto bread_err2;
3519
3520 error = xlog_bread_noalign(log, wrapped_hblks,
3521 bblks - split_bblks,
3522 dbp);
3523 if (error)
3524 goto bread_err2;
3525
3526 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3527 if (error) 3528 if (error)
3528 goto bread_err2; 3529 goto bread_err2;
3529 } 3530 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
1900 uint nmsb, 1900 uint nmsb,
1901 int rsvd) 1901 int rsvd)
1902{ 1902{
1903 xfs_mod_sb_t *msbp = &msb[0]; 1903 xfs_mod_sb_t *msbp;
1904 int error = 0; 1904 int error = 0;
1905 1905
1906 /* 1906 /*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
1910 * changes will be atomic. 1910 * changes will be atomic.
1911 */ 1911 */
1912 spin_lock(&mp->m_sb_lock); 1912 spin_lock(&mp->m_sb_lock);
1913 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1913 for (msbp = msb; msbp < (msb + nmsb); msbp++) {
1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
1915 msbp->msb_field > XFS_SBS_FDBLOCKS); 1915 msbp->msb_field > XFS_SBS_FDBLOCKS);
1916 1916
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 19af0ab0d0c6..3d68bb267c5f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -224,6 +224,7 @@ typedef struct xfs_mount {
224#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 224#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
225 operations, typically for 225 operations, typically for
226 disk errors in metadata */ 226 disk errors in metadata */
227#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */
227#define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to 228#define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to
228 user */ 229 user */
229#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment 230#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..7c7bc2b786bd 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
608xfs_trans_free( 608xfs_trans_free(
609 struct xfs_trans *tp) 609 struct xfs_trans *tp)
610{ 610{
611 struct xfs_busy_extent *busyp, *n; 611 xfs_alloc_busy_sort(&tp->t_busy);
612 612 xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
613 list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
614 xfs_alloc_busy_clear(tp->t_mountp, busyp);
615 613
616 atomic_dec(&tp->t_mountp->m_active_trans); 614 atomic_dec(&tp->t_mountp->m_active_trans);
617 xfs_trans_free_dqinfo(tp); 615 xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint32_t xlog_tid_t; /* transaction ID type */
77
78/* 76/*
79 * These types are 64 bits on disk but are either 32 or 64 bits in memory. 77 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
80 * Disk based types: 78 * Disk based types: